avx512f.rs source code [crates/core_arch/src/x86/avx512f.rs]

1	use crate::{
2	arch::asm,
3	core_arch::{simd::, x86::},
4	intrinsics::simd::*,
5	mem, ptr,
6	};
7
8	// x86-32 wants to use a 32-bit address size, but asm! defaults to using the full
9	// register name (e.g. rax). We have to explicitly override the placeholder to
10	// use the 32-bit register name in that case.
11
12	#[cfg(target_pointer_width = "32")]
13	macro_rules! vpl {
14	($inst:expr) => {
15	concat!($inst, ", [{p:e}]")
16	};
17	}
18	#[cfg(target_pointer_width = "64")]
19	macro_rules! vpl {
20	($inst:expr) => {
21	concat!($inst, ", [{p}]")
22	};
23	}
24	#[cfg(target_pointer_width = "32")]
25	macro_rules! vps {
26	($inst1:expr, $inst2:expr) => {
27	concat!($inst1, " [{p:e}]", $inst2)
28	};
29	}
30	#[cfg(target_pointer_width = "64")]
31	macro_rules! vps {
32	($inst1:expr, $inst2:expr) => {
33	concat!($inst1, " [{p}]", $inst2)
34	};
35	}
36
37	pub(crate) use {vpl, vps};
38
39	#[cfg(test)]
40	use stdarch_test::assert_instr;
41
42	/// Computes the absolute values of packed 32-bit integers in `a`.
43	///
44	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
45	#[inline]
46	#[target_feature(enable = "avx512f")]
47	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
48	#[cfg_attr(test, assert_instr(vpabsd))]
49	pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
50	let a: i32x16 = a.as_i32x16();
51	// all-0 is a properly initialized i32x16
52	let zero: i32x16 = mem::zeroed();
53	let sub: i32x16 = simd_sub(lhs:zero, rhs:a);
54	let cmp: i32x16 = simd_gt(x:a, y:zero);
55	transmute(src:simd_select(mask:cmp, if_true:a, if_false:sub))
56	}
57
58	/// Computes the absolute value of packed 32-bit integers in `a`, and store the
59	/// unsigned results in `dst` using writemask `k` (elements are copied from
60	/// `src` when the corresponding mask bit is not set).
61	///
62	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
63	#[inline]
64	#[target_feature(enable = "avx512f")]
65	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
66	#[cfg_attr(test, assert_instr(vpabsd))]
67	pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
68	let abs: i32x16 = _mm512_abs_epi32(a).as_i32x16();
69	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x16()))
70	}
71
72	/// Computes the absolute value of packed 32-bit integers in `a`, and store the
73	/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
74	/// the corresponding mask bit is not set).
75	///
76	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
77	#[inline]
78	#[target_feature(enable = "avx512f")]
79	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80	#[cfg_attr(test, assert_instr(vpabsd))]
81	pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
82	let abs: i32x16 = _mm512_abs_epi32(a).as_i32x16();
83	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
84	transmute(src:simd_select_bitmask(m:k, yes:abs, no:zero))
85	}
86
87	/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
88	///
89	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
90	#[inline]
91	#[target_feature(enable = "avx512f,avx512vl")]
92	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
93	#[cfg_attr(test, assert_instr(vpabsd))]
94	pub unsafe fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
95	let abs: i32x8 = _mm256_abs_epi32(a).as_i32x8();
96	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x8()))
97	}
98
99	/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
100	///
101	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
102	#[inline]
103	#[target_feature(enable = "avx512f,avx512vl")]
104	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
105	#[cfg_attr(test, assert_instr(vpabsd))]
106	pub unsafe fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
107	let abs: i32x8 = _mm256_abs_epi32(a).as_i32x8();
108	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
109	transmute(src:simd_select_bitmask(m:k, yes:abs, no:zero))
110	}
111
112	/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
113	///
114	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
115	#[inline]
116	#[target_feature(enable = "avx512f,avx512vl")]
117	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
118	#[cfg_attr(test, assert_instr(vpabsd))]
119	pub unsafe fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
120	let abs: i32x4 = _mm_abs_epi32(a).as_i32x4();
121	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x4()))
122	}
123
124	/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
125	///
126	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
127	#[inline]
128	#[target_feature(enable = "avx512f,avx512vl")]
129	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
130	#[cfg_attr(test, assert_instr(vpabsd))]
131	pub unsafe fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
132	let abs: i32x4 = _mm_abs_epi32(a).as_i32x4();
133	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
134	transmute(src:simd_select_bitmask(m:k, yes:abs, no:zero))
135	}
136
137	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
138	///
139	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
140	#[inline]
141	#[target_feature(enable = "avx512f")]
142	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
143	#[cfg_attr(test, assert_instr(vpabsq))]
144	pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
145	let a: i64x8 = a.as_i64x8();
146	// all-0 is a properly initialized i64x8
147	let zero: i64x8 = mem::zeroed();
148	let sub: i64x8 = simd_sub(lhs:zero, rhs:a);
149	let cmp: i64x8 = simd_gt(x:a, y:zero);
150	transmute(src:simd_select(mask:cmp, if_true:a, if_false:sub))
151	}
152
153	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
154	///
155	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
156	#[inline]
157	#[target_feature(enable = "avx512f")]
158	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
159	#[cfg_attr(test, assert_instr(vpabsq))]
160	pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
161	let abs: i64x8 = _mm512_abs_epi64(a).as_i64x8();
162	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x8()))
163	}
164
165	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
166	///
167	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
168	#[inline]
169	#[target_feature(enable = "avx512f")]
170	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
171	#[cfg_attr(test, assert_instr(vpabsq))]
172	pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
173	let abs: i64x8 = _mm512_abs_epi64(a).as_i64x8();
174	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
175	transmute(src:simd_select_bitmask(m:k, yes:abs, no:zero))
176	}
177
178	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
179	///
180	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
181	#[inline]
182	#[target_feature(enable = "avx512f,avx512vl")]
183	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
184	#[cfg_attr(test, assert_instr(vpabsq))]
185	pub unsafe fn _mm256_abs_epi64(a: __m256i) -> __m256i {
186	let a: i64x4 = a.as_i64x4();
187	// all-0 is a properly initialized i64x4
188	let zero: i64x4 = mem::zeroed();
189	let sub: i64x4 = simd_sub(lhs:zero, rhs:a);
190	let cmp: i64x4 = simd_gt(x:a, y:zero);
191	transmute(src:simd_select(mask:cmp, if_true:a, if_false:sub))
192	}
193
194	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
195	///
196	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
197	#[inline]
198	#[target_feature(enable = "avx512f,avx512vl")]
199	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
200	#[cfg_attr(test, assert_instr(vpabsq))]
201	pub unsafe fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
202	let abs: i64x4 = _mm256_abs_epi64(a).as_i64x4();
203	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x4()))
204	}
205
206	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
207	///
208	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
209	#[inline]
210	#[target_feature(enable = "avx512f,avx512vl")]
211	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
212	#[cfg_attr(test, assert_instr(vpabsq))]
213	pub unsafe fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
214	let abs: i64x4 = _mm256_abs_epi64(a).as_i64x4();
215	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
216	transmute(src:simd_select_bitmask(m:k, yes:abs, no:zero))
217	}
218
219	/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
220	///
221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
222	#[inline]
223	#[target_feature(enable = "avx512f")]
224	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
225	#[cfg_attr(test, assert_instr(vpandq))]
226	pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 {
227	let a: __m512i = _mm512_set1_epi32(`0x7FFFFFFF`); // from LLVM code
228	let b: __m512i = transmute::<f32x16, __m512i>(src:v2.as_f32x16());
229	let abs: __m512i = _mm512_and_epi32(a, b);
230	transmute(src:abs)
231	}
232
233	/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
234	///
235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
236	#[inline]
237	#[target_feature(enable = "avx512f")]
238	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
239	#[cfg_attr(test, assert_instr(vpandd))]
240	pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
241	let abs: f32x16 = _mm512_abs_ps(v2).as_f32x16();
242	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_f32x16()))
243	}
244
245	/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
246	///
247	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
248	#[inline]
249	#[target_feature(enable = "avx512f")]
250	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
251	#[cfg_attr(test, assert_instr(vpandq))]
252	pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d {
253	let a: __m512i = _mm512_set1_epi64(`0x7FFFFFFFFFFFFFFF`); // from LLVM code
254	let b: __m512i = transmute::<f64x8, __m512i>(src:v2.as_f64x8());
255	let abs: __m512i = _mm512_and_epi64(a, b);
256	transmute(src:abs)
257	}
258
259	/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
260	///
261	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
262	#[inline]
263	#[target_feature(enable = "avx512f")]
264	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
265	#[cfg_attr(test, assert_instr(vpandq))]
266	pub unsafe fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
267	let abs: f64x8 = _mm512_abs_pd(v2).as_f64x8();
268	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_f64x8()))
269	}
270
271	/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
272	///
273	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
274	#[inline]
275	#[target_feature(enable = "avx512f")]
276	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
277	#[cfg_attr(test, assert_instr(vmovdqa32))]
278	pub unsafe fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
279	let mov: i32x16 = a.as_i32x16();
280	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x16()))
281	}
282
283	/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
284	///
285	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
286	#[inline]
287	#[target_feature(enable = "avx512f")]
288	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
289	#[cfg_attr(test, assert_instr(vmovdqa32))]
290	pub unsafe fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
291	let mov: i32x16 = a.as_i32x16();
292	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
293	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
294	}
295
296	/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
297	///
298	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
299	#[inline]
300	#[target_feature(enable = "avx512f,avx512vl")]
301	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
302	#[cfg_attr(test, assert_instr(vmovdqa32))]
303	pub unsafe fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
304	let mov: i32x8 = a.as_i32x8();
305	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x8()))
306	}
307
308	/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
309	///
310	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
311	#[inline]
312	#[target_feature(enable = "avx512f,avx512vl")]
313	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
314	#[cfg_attr(test, assert_instr(vmovdqa32))]
315	pub unsafe fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
316	let mov: i32x8 = a.as_i32x8();
317	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
318	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
319	}
320
321	/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
322	///
323	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
324	#[inline]
325	#[target_feature(enable = "avx512f,avx512vl")]
326	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
327	#[cfg_attr(test, assert_instr(vmovdqa32))]
328	pub unsafe fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
329	let mov: i32x4 = a.as_i32x4();
330	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x4()))
331	}
332
333	/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
334	///
335	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
336	#[inline]
337	#[target_feature(enable = "avx512f,avx512vl")]
338	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
339	#[cfg_attr(test, assert_instr(vmovdqa32))]
340	pub unsafe fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
341	let mov: i32x4 = a.as_i32x4();
342	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
343	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
344	}
345
346	/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
347	///
348	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
349	#[inline]
350	#[target_feature(enable = "avx512f")]
351	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
352	#[cfg_attr(test, assert_instr(vmovdqa64))]
353	pub unsafe fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
354	let mov: i64x8 = a.as_i64x8();
355	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x8()))
356	}
357
358	/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
359	///
360	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
361	#[inline]
362	#[target_feature(enable = "avx512f")]
363	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
364	#[cfg_attr(test, assert_instr(vmovdqa64))]
365	pub unsafe fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
366	let mov: i64x8 = a.as_i64x8();
367	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
368	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
369	}
370
371	/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
372	///
373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
374	#[inline]
375	#[target_feature(enable = "avx512f,avx512vl")]
376	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
377	#[cfg_attr(test, assert_instr(vmovdqa64))]
378	pub unsafe fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
379	let mov: i64x4 = a.as_i64x4();
380	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x4()))
381	}
382
383	/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
384	///
385	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
386	#[inline]
387	#[target_feature(enable = "avx512f,avx512vl")]
388	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
389	#[cfg_attr(test, assert_instr(vmovdqa64))]
390	pub unsafe fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
391	let mov: i64x4 = a.as_i64x4();
392	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
393	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
394	}
395
396	/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
397	///
398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
399	#[inline]
400	#[target_feature(enable = "avx512f,avx512vl")]
401	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
402	#[cfg_attr(test, assert_instr(vmovdqa64))]
403	pub unsafe fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
404	let mov: i64x2 = a.as_i64x2();
405	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x2()))
406	}
407
408	/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
409	///
410	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
411	#[inline]
412	#[target_feature(enable = "avx512f,avx512vl")]
413	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
414	#[cfg_attr(test, assert_instr(vmovdqa64))]
415	pub unsafe fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
416	let mov: i64x2 = a.as_i64x2();
417	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
418	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
419	}
420
421	/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
422	///
423	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
424	#[inline]
425	#[target_feature(enable = "avx512f")]
426	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
427	#[cfg_attr(test, assert_instr(vmovaps))]
428	pub unsafe fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
429	let mov: f32x16 = a.as_f32x16();
430	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
431	}
432
433	/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
434	///
435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
436	#[inline]
437	#[target_feature(enable = "avx512f")]
438	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
439	#[cfg_attr(test, assert_instr(vmovaps))]
440	pub unsafe fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
441	let mov: f32x16 = a.as_f32x16();
442	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
443	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
444	}
445
446	/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
447	///
448	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
449	#[inline]
450	#[target_feature(enable = "avx512f,avx512vl")]
451	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
452	#[cfg_attr(test, assert_instr(vmovaps))]
453	pub unsafe fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
454	let mov: f32x8 = a.as_f32x8();
455	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x8()))
456	}
457
458	/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
459	///
460	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
461	#[inline]
462	#[target_feature(enable = "avx512f,avx512vl")]
463	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
464	#[cfg_attr(test, assert_instr(vmovaps))]
465	pub unsafe fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
466	let mov: f32x8 = a.as_f32x8();
467	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
468	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
469	}
470
471	/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
472	///
473	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
474	#[inline]
475	#[target_feature(enable = "avx512f,avx512vl")]
476	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
477	#[cfg_attr(test, assert_instr(vmovaps))]
478	pub unsafe fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
479	let mov: f32x4 = a.as_f32x4();
480	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x4()))
481	}
482
483	/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
484	///
485	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
486	#[inline]
487	#[target_feature(enable = "avx512f,avx512vl")]
488	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
489	#[cfg_attr(test, assert_instr(vmovaps))]
490	pub unsafe fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
491	let mov: f32x4 = a.as_f32x4();
492	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
493	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
494	}
495
496	/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
497	///
498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
499	#[inline]
500	#[target_feature(enable = "avx512f")]
501	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
502	#[cfg_attr(test, assert_instr(vmovapd))]
503	pub unsafe fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
504	let mov: f64x8 = a.as_f64x8();
505	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
506	}
507
508	/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
509	///
510	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
511	#[inline]
512	#[target_feature(enable = "avx512f")]
513	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
514	#[cfg_attr(test, assert_instr(vmovapd))]
515	pub unsafe fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
516	let mov: f64x8 = a.as_f64x8();
517	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
518	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
519	}
520
521	/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
522	///
523	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
524	#[inline]
525	#[target_feature(enable = "avx512f,avx512vl")]
526	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
527	#[cfg_attr(test, assert_instr(vmovapd))]
528	pub unsafe fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
529	let mov: f64x4 = a.as_f64x4();
530	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x4()))
531	}
532
533	/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
534	///
535	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
536	#[inline]
537	#[target_feature(enable = "avx512f,avx512vl")]
538	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
539	#[cfg_attr(test, assert_instr(vmovapd))]
540	pub unsafe fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
541	let mov: f64x4 = a.as_f64x4();
542	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
543	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
544	}
545
546	/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
547	///
548	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
549	#[inline]
550	#[target_feature(enable = "avx512f,avx512vl")]
551	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
552	#[cfg_attr(test, assert_instr(vmovapd))]
553	pub unsafe fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
554	let mov: f64x2 = a.as_f64x2();
555	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x2()))
556	}
557
558	/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
559	///
560	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
561	#[inline]
562	#[target_feature(enable = "avx512f,avx512vl")]
563	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
564	#[cfg_attr(test, assert_instr(vmovapd))]
565	pub unsafe fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
566	let mov: f64x2 = a.as_f64x2();
567	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
568	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
569	}
570
571	/// Add packed 32-bit integers in a and b, and store the results in dst.
572	///
573	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
574	#[inline]
575	#[target_feature(enable = "avx512f")]
576	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
577	#[cfg_attr(test, assert_instr(vpaddd))]
578	pub unsafe fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
579	transmute(src:simd_add(x:a.as_i32x16(), y:b.as_i32x16()))
580	}
581
582	/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
583	///
584	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
585	#[inline]
586	#[target_feature(enable = "avx512f")]
587	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
588	#[cfg_attr(test, assert_instr(vpaddd))]
589	pub unsafe fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
590	let add: i32x16 = _mm512_add_epi32(a, b).as_i32x16();
591	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x16()))
592	}
593
594	/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
595	///
596	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
597	#[inline]
598	#[target_feature(enable = "avx512f")]
599	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
600	#[cfg_attr(test, assert_instr(vpaddd))]
601	pub unsafe fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
602	let add: i32x16 = _mm512_add_epi32(a, b).as_i32x16();
603	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
604	transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
605	}
606
607	/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
608	///
609	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
610	#[inline]
611	#[target_feature(enable = "avx512f,avx512vl")]
612	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
613	#[cfg_attr(test, assert_instr(vpaddd))]
614	pub unsafe fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
615	let add: i32x8 = _mm256_add_epi32(a, b).as_i32x8();
616	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x8()))
617	}
618
619	/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
620	///
621	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
622	#[inline]
623	#[target_feature(enable = "avx512f,avx512vl")]
624	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
625	#[cfg_attr(test, assert_instr(vpaddd))]
626	pub unsafe fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
627	let add: i32x8 = _mm256_add_epi32(a, b).as_i32x8();
628	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
629	transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
630	}
631
632	/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
633	///
634	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
635	#[inline]
636	#[target_feature(enable = "avx512f,avx512vl")]
637	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
638	#[cfg_attr(test, assert_instr(vpaddd))]
639	pub unsafe fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
640	let add: i32x4 = _mm_add_epi32(a, b).as_i32x4();
641	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x4()))
642	}
643
644	/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
645	///
646	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
647	#[inline]
648	#[target_feature(enable = "avx512f,avx512vl")]
649	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
650	#[cfg_attr(test, assert_instr(vpaddd))]
651	pub unsafe fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
652	let add: i32x4 = _mm_add_epi32(a, b).as_i32x4();
653	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
654	transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
655	}
656
657	/// Add packed 64-bit integers in a and b, and store the results in dst.
658	///
659	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
660	#[inline]
661	#[target_feature(enable = "avx512f")]
662	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
663	#[cfg_attr(test, assert_instr(vpaddq))]
664	pub unsafe fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
665	transmute(src:simd_add(x:a.as_i64x8(), y:b.as_i64x8()))
666	}
667
668	/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
669	///
670	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
671	#[inline]
672	#[target_feature(enable = "avx512f")]
673	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
674	#[cfg_attr(test, assert_instr(vpaddq))]
675	pub unsafe fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
676	let add: i64x8 = _mm512_add_epi64(a, b).as_i64x8();
677	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x8()))
678	}
679
680	/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
681	///
682	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
683	#[inline]
684	#[target_feature(enable = "avx512f")]
685	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
686	#[cfg_attr(test, assert_instr(vpaddq))]
687	pub unsafe fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
688	let add: i64x8 = _mm512_add_epi64(a, b).as_i64x8();
689	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
690	transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
691	}
692
693	/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
694	///
695	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
696	#[inline]
697	#[target_feature(enable = "avx512f,avx512vl")]
698	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
699	#[cfg_attr(test, assert_instr(vpaddq))]
700	pub unsafe fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
701	let add: i64x4 = _mm256_add_epi64(a, b).as_i64x4();
702	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x4()))
703	}
704
705	/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
706	///
707	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
708	#[inline]
709	#[target_feature(enable = "avx512f,avx512vl")]
710	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
711	#[cfg_attr(test, assert_instr(vpaddq))]
712	pub unsafe fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
713	let add: i64x4 = _mm256_add_epi64(a, b).as_i64x4();
714	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
715	transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
716	}
717
718	/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
719	///
720	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
721	#[inline]
722	#[target_feature(enable = "avx512f,avx512vl")]
723	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
724	#[cfg_attr(test, assert_instr(vpaddq))]
725	pub unsafe fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
726	let add: i64x2 = _mm_add_epi64(a, b).as_i64x2();
727	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x2()))
728	}
729
730	/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
731	///
732	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
733	#[inline]
734	#[target_feature(enable = "avx512f,avx512vl")]
735	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
736	#[cfg_attr(test, assert_instr(vpaddq))]
737	pub unsafe fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
738	let add: i64x2 = _mm_add_epi64(a, b).as_i64x2();
739	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
740	transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
741	}
742
743	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
744	///
745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
746	#[inline]
747	#[target_feature(enable = "avx512f")]
748	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
749	#[cfg_attr(test, assert_instr(vaddps))]
750	pub unsafe fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
751	transmute(src:simd_add(x:a.as_f32x16(), y:b.as_f32x16()))
752	}
753
754	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
755	///
756	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
757	#[inline]
758	#[target_feature(enable = "avx512f")]
759	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
760	#[cfg_attr(test, assert_instr(vaddps))]
761	pub unsafe fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
762	let add: f32x16 = _mm512_add_ps(a, b).as_f32x16();
763	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x16()))
764	}
765
766	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
767	///
768	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
769	#[inline]
770	#[target_feature(enable = "avx512f")]
771	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
772	#[cfg_attr(test, assert_instr(vaddps))]
773	pub unsafe fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
774	let add: f32x16 = _mm512_add_ps(a, b).as_f32x16();
775	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
776	transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
777	}
778
779	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
780	///
781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
782	#[inline]
783	#[target_feature(enable = "avx512f,avx512vl")]
784	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
785	#[cfg_attr(test, assert_instr(vaddps))]
786	pub unsafe fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
787	let add: f32x8 = _mm256_add_ps(a, b).as_f32x8();
788	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x8()))
789	}
790
791	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
792	///
793	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
794	#[inline]
795	#[target_feature(enable = "avx512f,avx512vl")]
796	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
797	#[cfg_attr(test, assert_instr(vaddps))]
798	pub unsafe fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
799	let add: f32x8 = _mm256_add_ps(a, b).as_f32x8();
800	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
801	transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
802	}
803
804	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
805	///
806	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
807	#[inline]
808	#[target_feature(enable = "avx512f,avx512vl")]
809	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
810	#[cfg_attr(test, assert_instr(vaddps))]
811	pub unsafe fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
812	let add: f32x4 = _mm_add_ps(a, b).as_f32x4();
813	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x4()))
814	}
815
816	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
817	///
818	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
819	#[inline]
820	#[target_feature(enable = "avx512f,avx512vl")]
821	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
822	#[cfg_attr(test, assert_instr(vaddps))]
823	pub unsafe fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
824	let add: f32x4 = _mm_add_ps(a, b).as_f32x4();
825	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
826	transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
827	}
828
829	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
830	///
831	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
832	#[inline]
833	#[target_feature(enable = "avx512f")]
834	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
835	#[cfg_attr(test, assert_instr(vaddpd))]
836	pub unsafe fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
837	transmute(src:simd_add(x:a.as_f64x8(), y:b.as_f64x8()))
838	}
839
840	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
841	///
842	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
843	#[inline]
844	#[target_feature(enable = "avx512f")]
845	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
846	#[cfg_attr(test, assert_instr(vaddpd))]
847	pub unsafe fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
848	let add: f64x8 = _mm512_add_pd(a, b).as_f64x8();
849	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x8()))
850	}
851
852	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
853	///
854	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
855	#[inline]
856	#[target_feature(enable = "avx512f")]
857	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
858	#[cfg_attr(test, assert_instr(vaddpd))]
859	pub unsafe fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
860	let add: f64x8 = _mm512_add_pd(a, b).as_f64x8();
861	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
862	transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
863	}
864
865	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
866	///
867	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
868	#[inline]
869	#[target_feature(enable = "avx512f,avx512vl")]
870	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
871	#[cfg_attr(test, assert_instr(vaddpd))]
872	pub unsafe fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
873	let add: f64x4 = _mm256_add_pd(a, b).as_f64x4();
874	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x4()))
875	}
876
877	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
878	///
879	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
880	#[inline]
881	#[target_feature(enable = "avx512f,avx512vl")]
882	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
883	#[cfg_attr(test, assert_instr(vaddpd))]
884	pub unsafe fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
885	let add: f64x4 = _mm256_add_pd(a, b).as_f64x4();
886	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
887	transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
888	}
889
890	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
891	///
892	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
893	#[inline]
894	#[target_feature(enable = "avx512f,avx512vl")]
895	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
896	#[cfg_attr(test, assert_instr(vaddpd))]
897	pub unsafe fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
898	let add: f64x2 = _mm_add_pd(a, b).as_f64x2();
899	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x2()))
900	}
901
902	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
903	///
904	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
905	#[inline]
906	#[target_feature(enable = "avx512f,avx512vl")]
907	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
908	#[cfg_attr(test, assert_instr(vaddpd))]
909	pub unsafe fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
910	let add: f64x2 = _mm_add_pd(a, b).as_f64x2();
911	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
912	transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
913	}
914
915	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
916	///
917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
918	#[inline]
919	#[target_feature(enable = "avx512f")]
920	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
921	#[cfg_attr(test, assert_instr(vpsubd))]
922	pub unsafe fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
923	transmute(src:simd_sub(lhs:a.as_i32x16(), rhs:b.as_i32x16()))
924	}
925
926	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
927	///
928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
929	#[inline]
930	#[target_feature(enable = "avx512f")]
931	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
932	#[cfg_attr(test, assert_instr(vpsubd))]
933	pub unsafe fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
934	let sub: i32x16 = _mm512_sub_epi32(a, b).as_i32x16();
935	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x16()))
936	}
937
938	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
939	///
940	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
941	#[inline]
942	#[target_feature(enable = "avx512f")]
943	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
944	#[cfg_attr(test, assert_instr(vpsubd))]
945	pub unsafe fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
946	let sub: i32x16 = _mm512_sub_epi32(a, b).as_i32x16();
947	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
948	transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
949	}
950
951	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
952	///
953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
954	#[inline]
955	#[target_feature(enable = "avx512f,avx512vl")]
956	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
957	#[cfg_attr(test, assert_instr(vpsubd))]
958	pub unsafe fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
959	let sub: i32x8 = _mm256_sub_epi32(a, b).as_i32x8();
960	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x8()))
961	}
962
963	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964	///
965	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
966	#[inline]
967	#[target_feature(enable = "avx512f,avx512vl")]
968	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
969	#[cfg_attr(test, assert_instr(vpsubd))]
970	pub unsafe fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
971	let sub: i32x8 = _mm256_sub_epi32(a, b).as_i32x8();
972	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
973	transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
974	}
975
976	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
977	///
978	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
979	#[inline]
980	#[target_feature(enable = "avx512f,avx512vl")]
981	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
982	#[cfg_attr(test, assert_instr(vpsubd))]
983	pub unsafe fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
984	let sub: i32x4 = _mm_sub_epi32(a, b).as_i32x4();
985	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x4()))
986	}
987
988	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
989	///
990	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
991	#[inline]
992	#[target_feature(enable = "avx512f,avx512vl")]
993	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
994	#[cfg_attr(test, assert_instr(vpsubd))]
995	pub unsafe fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
996	let sub: i32x4 = _mm_sub_epi32(a, b).as_i32x4();
997	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
998	transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
999	}
1000
1001	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1002	///
1003	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1004	#[inline]
1005	#[target_feature(enable = "avx512f")]
1006	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1007	#[cfg_attr(test, assert_instr(vpsubq))]
1008	pub unsafe fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1009	transmute(src:simd_sub(lhs:a.as_i64x8(), rhs:b.as_i64x8()))
1010	}
1011
1012	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1013	///
1014	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1015	#[inline]
1016	#[target_feature(enable = "avx512f")]
1017	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1018	#[cfg_attr(test, assert_instr(vpsubq))]
1019	pub unsafe fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1020	let sub: i64x8 = _mm512_sub_epi64(a, b).as_i64x8();
1021	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x8()))
1022	}
1023
1024	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1025	///
1026	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1027	#[inline]
1028	#[target_feature(enable = "avx512f")]
1029	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1030	#[cfg_attr(test, assert_instr(vpsubq))]
1031	pub unsafe fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1032	let sub: i64x8 = _mm512_sub_epi64(a, b).as_i64x8();
1033	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
1034	transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1035	}
1036
1037	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1038	///
1039	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1040	#[inline]
1041	#[target_feature(enable = "avx512f,avx512vl")]
1042	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1043	#[cfg_attr(test, assert_instr(vpsubq))]
1044	pub unsafe fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1045	let sub: i64x4 = _mm256_sub_epi64(a, b).as_i64x4();
1046	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x4()))
1047	}
1048
1049	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1050	///
1051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1052	#[inline]
1053	#[target_feature(enable = "avx512f,avx512vl")]
1054	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1055	#[cfg_attr(test, assert_instr(vpsubq))]
1056	pub unsafe fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1057	let sub: i64x4 = _mm256_sub_epi64(a, b).as_i64x4();
1058	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1059	transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1060	}
1061
1062	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1063	///
1064	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1065	#[inline]
1066	#[target_feature(enable = "avx512f,avx512vl")]
1067	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1068	#[cfg_attr(test, assert_instr(vpsubq))]
1069	pub unsafe fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1070	let sub: i64x2 = _mm_sub_epi64(a, b).as_i64x2();
1071	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x2()))
1072	}
1073
1074	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1075	///
1076	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1077	#[inline]
1078	#[target_feature(enable = "avx512f,avx512vl")]
1079	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1080	#[cfg_attr(test, assert_instr(vpsubq))]
1081	pub unsafe fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1082	let sub: i64x2 = _mm_sub_epi64(a, b).as_i64x2();
1083	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1084	transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1085	}
1086
1087	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1088	///
1089	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1090	#[inline]
1091	#[target_feature(enable = "avx512f")]
1092	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1093	#[cfg_attr(test, assert_instr(vsubps))]
1094	pub unsafe fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1095	transmute(src:simd_sub(lhs:a.as_f32x16(), rhs:b.as_f32x16()))
1096	}
1097
1098	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1099	///
1100	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1101	#[inline]
1102	#[target_feature(enable = "avx512f")]
1103	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1104	#[cfg_attr(test, assert_instr(vsubps))]
1105	pub unsafe fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1106	let sub: f32x16 = _mm512_sub_ps(a, b).as_f32x16();
1107	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x16()))
1108	}
1109
1110	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1111	///
1112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1113	#[inline]
1114	#[target_feature(enable = "avx512f")]
1115	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1116	#[cfg_attr(test, assert_instr(vsubps))]
1117	pub unsafe fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1118	let sub: f32x16 = _mm512_sub_ps(a, b).as_f32x16();
1119	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
1120	transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1121	}
1122
1123	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1124	///
1125	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1126	#[inline]
1127	#[target_feature(enable = "avx512f,avx512vl")]
1128	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1129	#[cfg_attr(test, assert_instr(vsubps))]
1130	pub unsafe fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1131	let sub: f32x8 = _mm256_sub_ps(a, b).as_f32x8();
1132	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x8()))
1133	}
1134
1135	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1136	///
1137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1138	#[inline]
1139	#[target_feature(enable = "avx512f,avx512vl")]
1140	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1141	#[cfg_attr(test, assert_instr(vsubps))]
1142	pub unsafe fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1143	let sub: f32x8 = _mm256_sub_ps(a, b).as_f32x8();
1144	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
1145	transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1146	}
1147
1148	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1149	///
1150	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1151	#[inline]
1152	#[target_feature(enable = "avx512f,avx512vl")]
1153	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1154	#[cfg_attr(test, assert_instr(vsubps))]
1155	pub unsafe fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1156	let sub: f32x4 = _mm_sub_ps(a, b).as_f32x4();
1157	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x4()))
1158	}
1159
1160	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1161	///
1162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1163	#[inline]
1164	#[target_feature(enable = "avx512f,avx512vl")]
1165	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1166	#[cfg_attr(test, assert_instr(vsubps))]
1167	pub unsafe fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1168	let sub: f32x4 = _mm_sub_ps(a, b).as_f32x4();
1169	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
1170	transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1171	}
1172
1173	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1174	///
1175	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1176	#[inline]
1177	#[target_feature(enable = "avx512f")]
1178	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1179	#[cfg_attr(test, assert_instr(vsubpd))]
1180	pub unsafe fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1181	transmute(src:simd_sub(lhs:a.as_f64x8(), rhs:b.as_f64x8()))
1182	}
1183
1184	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1185	///
1186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1187	#[inline]
1188	#[target_feature(enable = "avx512f")]
1189	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1190	#[cfg_attr(test, assert_instr(vsubpd))]
1191	pub unsafe fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1192	let sub: f64x8 = _mm512_sub_pd(a, b).as_f64x8();
1193	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x8()))
1194	}
1195
1196	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1197	///
1198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1199	#[inline]
1200	#[target_feature(enable = "avx512f")]
1201	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1202	#[cfg_attr(test, assert_instr(vsubpd))]
1203	pub unsafe fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1204	let sub: f64x8 = _mm512_sub_pd(a, b).as_f64x8();
1205	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
1206	transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1207	}
1208
1209	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1210	///
1211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1212	#[inline]
1213	#[target_feature(enable = "avx512f,avx512vl")]
1214	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1215	#[cfg_attr(test, assert_instr(vsubpd))]
1216	pub unsafe fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1217	let sub: f64x4 = _mm256_sub_pd(a, b).as_f64x4();
1218	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x4()))
1219	}
1220
1221	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1222	///
1223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1224	#[inline]
1225	#[target_feature(enable = "avx512f,avx512vl")]
1226	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1227	#[cfg_attr(test, assert_instr(vsubpd))]
1228	pub unsafe fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1229	let sub: f64x4 = _mm256_sub_pd(a, b).as_f64x4();
1230	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
1231	transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1232	}
1233
1234	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1235	///
1236	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1237	#[inline]
1238	#[target_feature(enable = "avx512f,avx512vl")]
1239	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1240	#[cfg_attr(test, assert_instr(vsubpd))]
1241	pub unsafe fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1242	let sub: f64x2 = _mm_sub_pd(a, b).as_f64x2();
1243	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x2()))
1244	}
1245
1246	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1247	///
1248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1249	#[inline]
1250	#[target_feature(enable = "avx512f,avx512vl")]
1251	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1252	#[cfg_attr(test, assert_instr(vsubpd))]
1253	pub unsafe fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1254	let sub: f64x2 = _mm_sub_pd(a, b).as_f64x2();
1255	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
1256	transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1257	}
1258
1259	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1260	///
1261	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1262	#[inline]
1263	#[target_feature(enable = "avx512f")]
1264	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1265	#[cfg_attr(test, assert_instr(vpmuldq))]
1266	pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1267	transmute(src:vpmuldq(a:a.as_i32x16(), b:b.as_i32x16()))
1268	}
1269
1270	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1271	///
1272	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1273	#[inline]
1274	#[target_feature(enable = "avx512f")]
1275	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1276	#[cfg_attr(test, assert_instr(vpmuldq))]
1277	pub unsafe fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1278	let mul: i64x8 = _mm512_mul_epi32(a, b).as_i64x8();
1279	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1280	}
1281
1282	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1283	///
1284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1285	#[inline]
1286	#[target_feature(enable = "avx512f")]
1287	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1288	#[cfg_attr(test, assert_instr(vpmuldq))]
1289	pub unsafe fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1290	let mul: i64x8 = _mm512_mul_epi32(a, b).as_i64x8();
1291	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
1292	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1293	}
1294
1295	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1296	///
1297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1298	#[inline]
1299	#[target_feature(enable = "avx512f,avx512vl")]
1300	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1301	#[cfg_attr(test, assert_instr(vpmuldq))]
1302	pub unsafe fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1303	let mul: i64x4 = _mm256_mul_epi32(a, b).as_i64x4();
1304	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x4()))
1305	}
1306
1307	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1308	///
1309	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1310	#[inline]
1311	#[target_feature(enable = "avx512f,avx512vl")]
1312	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1313	#[cfg_attr(test, assert_instr(vpmuldq))]
1314	pub unsafe fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1315	let mul: i64x4 = _mm256_mul_epi32(a, b).as_i64x4();
1316	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1317	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1318	}
1319
1320	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1321	///
1322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1323	#[inline]
1324	#[target_feature(enable = "avx512f,avx512vl")]
1325	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1326	#[cfg_attr(test, assert_instr(vpmuldq))]
1327	pub unsafe fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1328	let mul: i64x2 = _mm_mul_epi32(a, b).as_i64x2();
1329	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x2()))
1330	}
1331
1332	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1333	///
1334	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1335	#[inline]
1336	#[target_feature(enable = "avx512f,avx512vl")]
1337	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1338	#[cfg_attr(test, assert_instr(vpmuldq))]
1339	pub unsafe fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1340	let mul: i64x2 = _mm_mul_epi32(a, b).as_i64x2();
1341	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1342	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1343	}
1344
1345	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1346	///
1347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi&expand=4005)
1348	#[inline]
1349	#[target_feature(enable = "avx512f")]
1350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1351	#[cfg_attr(test, assert_instr(vpmulld))]
1352	pub unsafe fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1353	transmute(src:simd_mul(x:a.as_i32x16(), y:b.as_i32x16()))
1354	}
1355
1356	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1357	///
1358	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1359	#[inline]
1360	#[target_feature(enable = "avx512f")]
1361	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1362	#[cfg_attr(test, assert_instr(vpmulld))]
1363	pub unsafe fn _mm512_mask_mullo_epi32(
1364	src: __m512i,
1365	k: __mmask16,
1366	a: __m512i,
1367	b: __m512i,
1368	) -> __m512i {
1369	let mul: i32x16 = _mm512_mullo_epi32(a, b).as_i32x16();
1370	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x16()))
1371	}
1372
1373	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1374	///
1375	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1376	#[inline]
1377	#[target_feature(enable = "avx512f")]
1378	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1379	#[cfg_attr(test, assert_instr(vpmulld))]
1380	pub unsafe fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1381	let mul: i32x16 = _mm512_mullo_epi32(a, b).as_i32x16();
1382	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
1383	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1384	}
1385
1386	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1387	///
1388	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1389	#[inline]
1390	#[target_feature(enable = "avx512f,avx512vl")]
1391	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1392	#[cfg_attr(test, assert_instr(vpmulld))]
1393	pub unsafe fn _mm256_mask_mullo_epi32(
1394	src: __m256i,
1395	k: __mmask8,
1396	a: __m256i,
1397	b: __m256i,
1398	) -> __m256i {
1399	let mul: i32x8 = _mm256_mullo_epi32(a, b).as_i32x8();
1400	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x8()))
1401	}
1402
1403	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1404	///
1405	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1406	#[inline]
1407	#[target_feature(enable = "avx512f,avx512vl")]
1408	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1409	#[cfg_attr(test, assert_instr(vpmulld))]
1410	pub unsafe fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1411	let mul: i32x8 = _mm256_mullo_epi32(a, b).as_i32x8();
1412	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
1413	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1414	}
1415
1416	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1417	///
1418	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1419	#[inline]
1420	#[target_feature(enable = "avx512f,avx512vl")]
1421	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1422	#[cfg_attr(test, assert_instr(vpmulld))]
1423	pub unsafe fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1424	let mul: i32x4 = _mm_mullo_epi32(a, b).as_i32x4();
1425	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x4()))
1426	}
1427
1428	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1429	///
1430	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1431	#[inline]
1432	#[target_feature(enable = "avx512f,avx512vl")]
1433	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1434	#[cfg_attr(test, assert_instr(vpmulld))]
1435	pub unsafe fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1436	let mul: i32x4 = _mm_mullo_epi32(a, b).as_i32x4();
1437	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1438	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1439	}
1440
1441	/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1442	///
1443	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mullox_epi64&expand=4017)
1444	///
1445	/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1446	#[inline]
1447	#[target_feature(enable = "avx512f")]
1448	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1449	pub unsafe fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1450	transmute(src:simd_mul(x:a.as_i64x8(), y:b.as_i64x8()))
1451	}
1452
1453	/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1454	///
1455	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_mullox&expand=4016)
1456	///
1457	/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1458	#[inline]
1459	#[target_feature(enable = "avx512f")]
1460	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1461	pub unsafe fn _mm512_mask_mullox_epi64(
1462	src: __m512i,
1463	k: __mmask8,
1464	a: __m512i,
1465	b: __m512i,
1466	) -> __m512i {
1467	let mul: i64x8 = _mm512_mullox_epi64(a, b).as_i64x8();
1468	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1469	}
1470
1471	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1472	///
1473	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mul_epu32&expand=3916)
1474	#[inline]
1475	#[target_feature(enable = "avx512f")]
1476	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1477	#[cfg_attr(test, assert_instr(vpmuludq))]
1478	pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1479	transmute(src:vpmuludq(a:a.as_u32x16(), b:b.as_u32x16()))
1480	}
1481
1482	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1483	///
1484	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_mul_epu32&expand=3914)
1485	#[inline]
1486	#[target_feature(enable = "avx512f")]
1487	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1488	#[cfg_attr(test, assert_instr(vpmuludq))]
1489	pub unsafe fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1490	let mul: u64x8 = _mm512_mul_epu32(a, b).as_u64x8();
1491	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x8()))
1492	}
1493
1494	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1495	///
1496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_mul_epu32&expand=3915)
1497	#[inline]
1498	#[target_feature(enable = "avx512f")]
1499	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1500	#[cfg_attr(test, assert_instr(vpmuludq))]
1501	pub unsafe fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1502	let mul: u64x8 = _mm512_mul_epu32(a, b).as_u64x8();
1503	let zero: u64x8 = _mm512_setzero_si512().as_u64x8();
1504	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1505	}
1506
1507	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1508	///
1509	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1510	#[inline]
1511	#[target_feature(enable = "avx512f,avx512vl")]
1512	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1513	#[cfg_attr(test, assert_instr(vpmuludq))]
1514	pub unsafe fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1515	let mul: u64x4 = _mm256_mul_epu32(a, b).as_u64x4();
1516	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x4()))
1517	}
1518
1519	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1520	///
1521	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1522	#[inline]
1523	#[target_feature(enable = "avx512f,avx512vl")]
1524	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1525	#[cfg_attr(test, assert_instr(vpmuludq))]
1526	pub unsafe fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1527	let mul: u64x4 = _mm256_mul_epu32(a, b).as_u64x4();
1528	let zero: u64x4 = _mm256_setzero_si256().as_u64x4();
1529	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1530	}
1531
1532	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1533	///
1534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1535	#[inline]
1536	#[target_feature(enable = "avx512f,avx512vl")]
1537	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1538	#[cfg_attr(test, assert_instr(vpmuludq))]
1539	pub unsafe fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1540	let mul: u64x2 = _mm_mul_epu32(a, b).as_u64x2();
1541	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x2()))
1542	}
1543
1544	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1545	///
1546	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1547	#[inline]
1548	#[target_feature(enable = "avx512f,avx512vl")]
1549	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1550	#[cfg_attr(test, assert_instr(vpmuludq))]
1551	pub unsafe fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1552	let mul: u64x2 = _mm_mul_epu32(a, b).as_u64x2();
1553	let zero: u64x2 = _mm_setzero_si128().as_u64x2();
1554	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1555	}
1556
1557	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1558	///
1559	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1560	#[inline]
1561	#[target_feature(enable = "avx512f")]
1562	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1563	#[cfg_attr(test, assert_instr(vmulps))]
1564	pub unsafe fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1565	transmute(src:simd_mul(x:a.as_f32x16(), y:b.as_f32x16()))
1566	}
1567
1568	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1569	///
1570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1571	#[inline]
1572	#[target_feature(enable = "avx512f")]
1573	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1574	#[cfg_attr(test, assert_instr(vmulps))]
1575	pub unsafe fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1576	let mul: f32x16 = _mm512_mul_ps(a, b).as_f32x16();
1577	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x16()))
1578	}
1579
1580	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1581	///
1582	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1583	#[inline]
1584	#[target_feature(enable = "avx512f")]
1585	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1586	#[cfg_attr(test, assert_instr(vmulps))]
1587	pub unsafe fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1588	let mul: f32x16 = _mm512_mul_ps(a, b).as_f32x16();
1589	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
1590	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1591	}
1592
1593	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1594	///
1595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1596	#[inline]
1597	#[target_feature(enable = "avx512f,avx512vl")]
1598	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1599	#[cfg_attr(test, assert_instr(vmulps))]
1600	pub unsafe fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1601	let mul: f32x8 = _mm256_mul_ps(a, b).as_f32x8();
1602	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x8()))
1603	}
1604
1605	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1606	///
1607	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1608	#[inline]
1609	#[target_feature(enable = "avx512f,avx512vl")]
1610	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1611	#[cfg_attr(test, assert_instr(vmulps))]
1612	pub unsafe fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1613	let mul: f32x8 = _mm256_mul_ps(a, b).as_f32x8();
1614	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
1615	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1616	}
1617
1618	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1619	///
1620	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1621	#[inline]
1622	#[target_feature(enable = "avx512f,avx512vl")]
1623	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1624	#[cfg_attr(test, assert_instr(vmulps))]
1625	pub unsafe fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1626	let mul: f32x4 = _mm_mul_ps(a, b).as_f32x4();
1627	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x4()))
1628	}
1629
1630	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1631	///
1632	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1633	#[inline]
1634	#[target_feature(enable = "avx512f,avx512vl")]
1635	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1636	#[cfg_attr(test, assert_instr(vmulps))]
1637	pub unsafe fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1638	let mul: f32x4 = _mm_mul_ps(a, b).as_f32x4();
1639	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
1640	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1641	}
1642
1643	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1644	///
1645	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1646	#[inline]
1647	#[target_feature(enable = "avx512f")]
1648	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1649	#[cfg_attr(test, assert_instr(vmulpd))]
1650	pub unsafe fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1651	transmute(src:simd_mul(x:a.as_f64x8(), y:b.as_f64x8()))
1652	}
1653
1654	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1655	///
1656	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1657	#[inline]
1658	#[target_feature(enable = "avx512f")]
1659	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1660	#[cfg_attr(test, assert_instr(vmulpd))]
1661	pub unsafe fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1662	let mul: f64x8 = _mm512_mul_pd(a, b).as_f64x8();
1663	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x8()))
1664	}
1665
1666	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1667	///
1668	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1669	#[inline]
1670	#[target_feature(enable = "avx512f")]
1671	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1672	#[cfg_attr(test, assert_instr(vmulpd))]
1673	pub unsafe fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1674	let mul: f64x8 = _mm512_mul_pd(a, b).as_f64x8();
1675	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
1676	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1677	}
1678
1679	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1680	///
1681	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1682	#[inline]
1683	#[target_feature(enable = "avx512f,avx512vl")]
1684	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1685	#[cfg_attr(test, assert_instr(vmulpd))]
1686	pub unsafe fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1687	let mul: f64x4 = _mm256_mul_pd(a, b).as_f64x4();
1688	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x4()))
1689	}
1690
1691	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1692	///
1693	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
1694	#[inline]
1695	#[target_feature(enable = "avx512f,avx512vl")]
1696	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1697	#[cfg_attr(test, assert_instr(vmulpd))]
1698	pub unsafe fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1699	let mul: f64x4 = _mm256_mul_pd(a, b).as_f64x4();
1700	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
1701	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1702	}
1703
1704	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1705	///
1706	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
1707	#[inline]
1708	#[target_feature(enable = "avx512f,avx512vl")]
1709	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1710	#[cfg_attr(test, assert_instr(vmulpd))]
1711	pub unsafe fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1712	let mul: f64x2 = _mm_mul_pd(a, b).as_f64x2();
1713	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x2()))
1714	}
1715
1716	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1717	///
1718	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
1719	#[inline]
1720	#[target_feature(enable = "avx512f,avx512vl")]
1721	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1722	#[cfg_attr(test, assert_instr(vmulpd))]
1723	pub unsafe fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1724	let mul: f64x2 = _mm_mul_pd(a, b).as_f64x2();
1725	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
1726	transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1727	}
1728
1729	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1730	///
1731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
1732	#[inline]
1733	#[target_feature(enable = "avx512f")]
1734	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1735	#[cfg_attr(test, assert_instr(vdivps))]
1736	pub unsafe fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
1737	transmute(src:simd_div(lhs:a.as_f32x16(), rhs:b.as_f32x16()))
1738	}
1739
1740	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1741	///
1742	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
1743	#[inline]
1744	#[target_feature(enable = "avx512f")]
1745	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1746	#[cfg_attr(test, assert_instr(vdivps))]
1747	pub unsafe fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1748	let div: f32x16 = _mm512_div_ps(a, b).as_f32x16();
1749	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x16()))
1750	}
1751
1752	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1753	///
1754	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
1755	#[inline]
1756	#[target_feature(enable = "avx512f")]
1757	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1758	#[cfg_attr(test, assert_instr(vdivps))]
1759	pub unsafe fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1760	let div: f32x16 = _mm512_div_ps(a, b).as_f32x16();
1761	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
1762	transmute(src:simd_select_bitmask(m:k, yes:div, no:zero))
1763	}
1764
1765	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1766	///
1767	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
1768	#[inline]
1769	#[target_feature(enable = "avx512f,avx512vl")]
1770	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1771	#[cfg_attr(test, assert_instr(vdivps))]
1772	pub unsafe fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1773	let div: f32x8 = _mm256_div_ps(a, b).as_f32x8();
1774	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x8()))
1775	}
1776
1777	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1778	///
1779	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
1780	#[inline]
1781	#[target_feature(enable = "avx512f,avx512vl")]
1782	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1783	#[cfg_attr(test, assert_instr(vdivps))]
1784	pub unsafe fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1785	let div: f32x8 = _mm256_div_ps(a, b).as_f32x8();
1786	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
1787	transmute(src:simd_select_bitmask(m:k, yes:div, no:zero))
1788	}
1789
1790	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1791	///
1792	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
1793	#[inline]
1794	#[target_feature(enable = "avx512f,avx512vl")]
1795	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1796	#[cfg_attr(test, assert_instr(vdivps))]
1797	pub unsafe fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1798	let div: f32x4 = _mm_div_ps(a, b).as_f32x4();
1799	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x4()))
1800	}
1801
1802	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1803	///
1804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
1805	#[inline]
1806	#[target_feature(enable = "avx512f,avx512vl")]
1807	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1808	#[cfg_attr(test, assert_instr(vdivps))]
1809	pub unsafe fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1810	let div: f32x4 = _mm_div_ps(a, b).as_f32x4();
1811	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
1812	transmute(src:simd_select_bitmask(m:k, yes:div, no:zero))
1813	}
1814
1815	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1816	///
1817	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_div_pd&expand=2153)
1818	#[inline]
1819	#[target_feature(enable = "avx512f")]
1820	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1821	#[cfg_attr(test, assert_instr(vdivpd))]
1822	pub unsafe fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
1823	transmute(src:simd_div(lhs:a.as_f64x8(), rhs:b.as_f64x8()))
1824	}
1825
1826	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1827	///
1828	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
1829	#[inline]
1830	#[target_feature(enable = "avx512f")]
1831	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1832	#[cfg_attr(test, assert_instr(vdivpd))]
1833	pub unsafe fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1834	let div: f64x8 = _mm512_div_pd(a, b).as_f64x8();
1835	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x8()))
1836	}
1837
1838	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1839	///
1840	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
1841	#[inline]
1842	#[target_feature(enable = "avx512f")]
1843	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1844	#[cfg_attr(test, assert_instr(vdivpd))]
1845	pub unsafe fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1846	let div: f64x8 = _mm512_div_pd(a, b).as_f64x8();
1847	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
1848	transmute(src:simd_select_bitmask(m:k, yes:div, no:zero))
1849	}
1850
1851	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1852	///
1853	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
1854	#[inline]
1855	#[target_feature(enable = "avx512f,avx512vl")]
1856	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1857	#[cfg_attr(test, assert_instr(vdivpd))]
1858	pub unsafe fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1859	let div: f64x4 = _mm256_div_pd(a, b).as_f64x4();
1860	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x4()))
1861	}
1862
1863	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1864	///
1865	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
1866	#[inline]
1867	#[target_feature(enable = "avx512f,avx512vl")]
1868	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1869	#[cfg_attr(test, assert_instr(vdivpd))]
1870	pub unsafe fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1871	let div: f64x4 = _mm256_div_pd(a, b).as_f64x4();
1872	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
1873	transmute(src:simd_select_bitmask(m:k, yes:div, no:zero))
1874	}
1875
1876	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1877	///
1878	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
1879	#[inline]
1880	#[target_feature(enable = "avx512f,avx512vl")]
1881	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1882	#[cfg_attr(test, assert_instr(vdivpd))]
1883	pub unsafe fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1884	let div: f64x2 = _mm_div_pd(a, b).as_f64x2();
1885	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x2()))
1886	}
1887
1888	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1889	///
1890	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
1891	#[inline]
1892	#[target_feature(enable = "avx512f,avx512vl")]
1893	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1894	#[cfg_attr(test, assert_instr(vdivpd))]
1895	pub unsafe fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1896	let div: f64x2 = _mm_div_pd(a, b).as_f64x2();
1897	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
1898	transmute(src:simd_select_bitmask(m:k, yes:div, no:zero))
1899	}
1900
1901	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
1902	///
1903	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
1904	#[inline]
1905	#[target_feature(enable = "avx512f")]
1906	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1907	#[cfg_attr(test, assert_instr(vpmaxsd))]
1908	pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
1909	transmute(src:vpmaxsd(a:a.as_i32x16(), b:b.as_i32x16()))
1910	}
1911
1912	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1913	///
1914	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
1915	#[inline]
1916	#[target_feature(enable = "avx512f")]
1917	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1918	#[cfg_attr(test, assert_instr(vpmaxsd))]
1919	pub unsafe fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1920	let max: i32x16 = _mm512_max_epi32(a, b).as_i32x16();
1921	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x16()))
1922	}
1923
1924	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1925	///
1926	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
1927	#[inline]
1928	#[target_feature(enable = "avx512f")]
1929	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1930	#[cfg_attr(test, assert_instr(vpmaxsd))]
1931	pub unsafe fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1932	let max: i32x16 = _mm512_max_epi32(a, b).as_i32x16();
1933	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
1934	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
1935	}
1936
1937	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1938	///
1939	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
1940	#[inline]
1941	#[target_feature(enable = "avx512f,avx512vl")]
1942	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1943	#[cfg_attr(test, assert_instr(vpmaxsd))]
1944	pub unsafe fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1945	let max: i32x8 = _mm256_max_epi32(a, b).as_i32x8();
1946	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x8()))
1947	}
1948
1949	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1950	///
1951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
1952	#[inline]
1953	#[target_feature(enable = "avx512f,avx512vl")]
1954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1955	#[cfg_attr(test, assert_instr(vpmaxsd))]
1956	pub unsafe fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1957	let max: i32x8 = _mm256_max_epi32(a, b).as_i32x8();
1958	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
1959	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
1960	}
1961
1962	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1963	///
1964	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
1965	#[inline]
1966	#[target_feature(enable = "avx512f,avx512vl")]
1967	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1968	#[cfg_attr(test, assert_instr(vpmaxsd))]
1969	pub unsafe fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1970	let max: i32x4 = _mm_max_epi32(a, b).as_i32x4();
1971	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x4()))
1972	}
1973
1974	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1975	///
1976	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
1977	#[inline]
1978	#[target_feature(enable = "avx512f,avx512vl")]
1979	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1980	#[cfg_attr(test, assert_instr(vpmaxsd))]
1981	pub unsafe fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1982	let max: i32x4 = _mm_max_epi32(a, b).as_i32x4();
1983	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1984	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
1985	}
1986
1987	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
1988	///
1989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
1990	#[inline]
1991	#[target_feature(enable = "avx512f")]
1992	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1993	#[cfg_attr(test, assert_instr(vpmaxsq))]
1994	pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
1995	transmute(src:vpmaxsq(a:a.as_i64x8(), b:b.as_i64x8()))
1996	}
1997
1998	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1999	///
2000	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2001	#[inline]
2002	#[target_feature(enable = "avx512f")]
2003	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2004	#[cfg_attr(test, assert_instr(vpmaxsq))]
2005	pub unsafe fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2006	let max: i64x8 = _mm512_max_epi64(a, b).as_i64x8();
2007	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x8()))
2008	}
2009
2010	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2011	///
2012	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2013	#[inline]
2014	#[target_feature(enable = "avx512f")]
2015	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2016	#[cfg_attr(test, assert_instr(vpmaxsq))]
2017	pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2018	let max: i64x8 = _mm512_max_epi64(a, b).as_i64x8();
2019	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
2020	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2021	}
2022
2023	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2024	///
2025	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2026	#[inline]
2027	#[target_feature(enable = "avx512f,avx512vl")]
2028	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2029	#[cfg_attr(test, assert_instr(vpmaxsq))]
2030	pub unsafe fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2031	transmute(src:vpmaxsq256(a:a.as_i64x4(), b:b.as_i64x4()))
2032	}
2033
2034	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2035	///
2036	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2037	#[inline]
2038	#[target_feature(enable = "avx512f,avx512vl")]
2039	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2040	#[cfg_attr(test, assert_instr(vpmaxsq))]
2041	pub unsafe fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2042	let max: i64x4 = _mm256_max_epi64(a, b).as_i64x4();
2043	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x4()))
2044	}
2045
2046	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2047	///
2048	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2049	#[inline]
2050	#[target_feature(enable = "avx512f,avx512vl")]
2051	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2052	#[cfg_attr(test, assert_instr(vpmaxsq))]
2053	pub unsafe fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2054	let max: i64x4 = _mm256_max_epi64(a, b).as_i64x4();
2055	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
2056	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2057	}
2058
2059	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2060	///
2061	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2062	#[inline]
2063	#[target_feature(enable = "avx512f,avx512vl")]
2064	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2065	#[cfg_attr(test, assert_instr(vpmaxsq))]
2066	pub unsafe fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2067	transmute(src:vpmaxsq128(a:a.as_i64x2(), b:b.as_i64x2()))
2068	}
2069
2070	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2071	///
2072	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2073	#[inline]
2074	#[target_feature(enable = "avx512f,avx512vl")]
2075	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2076	#[cfg_attr(test, assert_instr(vpmaxsq))]
2077	pub unsafe fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2078	let max: i64x2 = _mm_max_epi64(a, b).as_i64x2();
2079	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x2()))
2080	}
2081
2082	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2083	///
2084	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2085	#[inline]
2086	#[target_feature(enable = "avx512f,avx512vl")]
2087	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2088	#[cfg_attr(test, assert_instr(vpmaxsq))]
2089	pub unsafe fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2090	let max: i64x2 = _mm_max_epi64(a, b).as_i64x2();
2091	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
2092	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2093	}
2094
2095	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2096	///
2097	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2098	#[inline]
2099	#[target_feature(enable = "avx512f")]
2100	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2101	#[cfg_attr(test, assert_instr(vmaxps))]
2102	pub unsafe fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2103	transmute(src:vmaxps(
2104	a:a.as_f32x16(),
2105	b:b.as_f32x16(),
2106	_MM_FROUND_CUR_DIRECTION,
2107	))
2108	}
2109
2110	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2111	///
2112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2113	#[inline]
2114	#[target_feature(enable = "avx512f")]
2115	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2116	#[cfg_attr(test, assert_instr(vmaxps))]
2117	pub unsafe fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2118	let max: f32x16 = _mm512_max_ps(a, b).as_f32x16();
2119	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x16()))
2120	}
2121
2122	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2123	///
2124	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2125	#[inline]
2126	#[target_feature(enable = "avx512f")]
2127	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2128	#[cfg_attr(test, assert_instr(vmaxps))]
2129	pub unsafe fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2130	let max: f32x16 = _mm512_max_ps(a, b).as_f32x16();
2131	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
2132	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2133	}
2134
2135	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2136	///
2137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2138	#[inline]
2139	#[target_feature(enable = "avx512f,avx512vl")]
2140	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2141	#[cfg_attr(test, assert_instr(vmaxps))]
2142	pub unsafe fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2143	let max: f32x8 = _mm256_max_ps(a, b).as_f32x8();
2144	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x8()))
2145	}
2146
2147	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2148	///
2149	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2150	#[inline]
2151	#[target_feature(enable = "avx512f,avx512vl")]
2152	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2153	#[cfg_attr(test, assert_instr(vmaxps))]
2154	pub unsafe fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2155	let max: f32x8 = _mm256_max_ps(a, b).as_f32x8();
2156	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
2157	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2158	}
2159
2160	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2161	///
2162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2163	#[inline]
2164	#[target_feature(enable = "avx512f,avx512vl")]
2165	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2166	#[cfg_attr(test, assert_instr(vmaxps))]
2167	pub unsafe fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2168	let max: f32x4 = _mm_max_ps(a, b).as_f32x4();
2169	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x4()))
2170	}
2171
2172	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2173	///
2174	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2175	#[inline]
2176	#[target_feature(enable = "avx512f,avx512vl")]
2177	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2178	#[cfg_attr(test, assert_instr(vmaxps))]
2179	pub unsafe fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2180	let max: f32x4 = _mm_max_ps(a, b).as_f32x4();
2181	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
2182	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2183	}
2184
2185	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2186	///
2187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2188	#[inline]
2189	#[target_feature(enable = "avx512f")]
2190	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2191	#[cfg_attr(test, assert_instr(vmaxpd))]
2192	pub unsafe fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2193	transmute(src:vmaxpd(a:a.as_f64x8(), b:b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
2194	}
2195
2196	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2197	///
2198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2199	#[inline]
2200	#[target_feature(enable = "avx512f")]
2201	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2202	#[cfg_attr(test, assert_instr(vmaxpd))]
2203	pub unsafe fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2204	let max: f64x8 = _mm512_max_pd(a, b).as_f64x8();
2205	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x8()))
2206	}
2207
2208	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2209	///
2210	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2211	#[inline]
2212	#[target_feature(enable = "avx512f")]
2213	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2214	#[cfg_attr(test, assert_instr(vmaxpd))]
2215	pub unsafe fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2216	let max: f64x8 = _mm512_max_pd(a, b).as_f64x8();
2217	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
2218	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2219	}
2220
2221	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2222	///
2223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2224	#[inline]
2225	#[target_feature(enable = "avx512f,avx512vl")]
2226	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2227	#[cfg_attr(test, assert_instr(vmaxpd))]
2228	pub unsafe fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2229	let max: f64x4 = _mm256_max_pd(a, b).as_f64x4();
2230	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x4()))
2231	}
2232
2233	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2234	///
2235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2236	#[inline]
2237	#[target_feature(enable = "avx512f,avx512vl")]
2238	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2239	#[cfg_attr(test, assert_instr(vmaxpd))]
2240	pub unsafe fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2241	let max: f64x4 = _mm256_max_pd(a, b).as_f64x4();
2242	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
2243	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2244	}
2245
2246	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2247	///
2248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2249	#[inline]
2250	#[target_feature(enable = "avx512f,avx512vl")]
2251	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2252	#[cfg_attr(test, assert_instr(vmaxpd))]
2253	pub unsafe fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2254	let max: f64x2 = _mm_max_pd(a, b).as_f64x2();
2255	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x2()))
2256	}
2257
2258	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2259	///
2260	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2261	#[inline]
2262	#[target_feature(enable = "avx512f,avx512vl")]
2263	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2264	#[cfg_attr(test, assert_instr(vmaxpd))]
2265	pub unsafe fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2266	let max: f64x2 = _mm_max_pd(a, b).as_f64x2();
2267	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
2268	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2269	}
2270
2271	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2272	///
2273	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2274	#[inline]
2275	#[target_feature(enable = "avx512f")]
2276	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2277	#[cfg_attr(test, assert_instr(vpmaxud))]
2278	pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2279	transmute(src:vpmaxud(a:a.as_u32x16(), b:b.as_u32x16()))
2280	}
2281
2282	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2283	///
2284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2285	#[inline]
2286	#[target_feature(enable = "avx512f")]
2287	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2288	#[cfg_attr(test, assert_instr(vpmaxud))]
2289	pub unsafe fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2290	let max: u32x16 = _mm512_max_epu32(a, b).as_u32x16();
2291	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x16()))
2292	}
2293
2294	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2295	///
2296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2297	#[inline]
2298	#[target_feature(enable = "avx512f")]
2299	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2300	#[cfg_attr(test, assert_instr(vpmaxud))]
2301	pub unsafe fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2302	let max: u32x16 = _mm512_max_epu32(a, b).as_u32x16();
2303	let zero: u32x16 = _mm512_setzero_si512().as_u32x16();
2304	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2305	}
2306
2307	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2308	///
2309	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2310	#[inline]
2311	#[target_feature(enable = "avx512f,avx512vl")]
2312	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2313	#[cfg_attr(test, assert_instr(vpmaxud))]
2314	pub unsafe fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2315	let max: u32x8 = _mm256_max_epu32(a, b).as_u32x8();
2316	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x8()))
2317	}
2318
2319	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2320	///
2321	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2322	#[inline]
2323	#[target_feature(enable = "avx512f,avx512vl")]
2324	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2325	#[cfg_attr(test, assert_instr(vpmaxud))]
2326	pub unsafe fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2327	let max: u32x8 = _mm256_max_epu32(a, b).as_u32x8();
2328	let zero: u32x8 = _mm256_setzero_si256().as_u32x8();
2329	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2330	}
2331
2332	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2333	///
2334	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2335	#[inline]
2336	#[target_feature(enable = "avx512f,avx512vl")]
2337	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2338	#[cfg_attr(test, assert_instr(vpmaxud))]
2339	pub unsafe fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2340	let max: u32x4 = _mm_max_epu32(a, b).as_u32x4();
2341	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x4()))
2342	}
2343
2344	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2345	///
2346	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2347	#[inline]
2348	#[target_feature(enable = "avx512f,avx512vl")]
2349	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2350	#[cfg_attr(test, assert_instr(vpmaxud))]
2351	pub unsafe fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2352	let max: u32x4 = _mm_max_epu32(a, b).as_u32x4();
2353	let zero: u32x4 = _mm_setzero_si128().as_u32x4();
2354	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2355	}
2356
2357	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2358	///
2359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2360	#[inline]
2361	#[target_feature(enable = "avx512f")]
2362	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2363	#[cfg_attr(test, assert_instr(vpmaxuq))]
2364	pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2365	transmute(src:vpmaxuq(a:a.as_u64x8(), b:b.as_u64x8()))
2366	}
2367
2368	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2369	///
2370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2371	#[inline]
2372	#[target_feature(enable = "avx512f")]
2373	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2374	#[cfg_attr(test, assert_instr(vpmaxuq))]
2375	pub unsafe fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2376	let max: u64x8 = _mm512_max_epu64(a, b).as_u64x8();
2377	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x8()))
2378	}
2379
2380	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2381	///
2382	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu&expand=3626)
2383	#[inline]
2384	#[target_feature(enable = "avx512f")]
2385	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2386	#[cfg_attr(test, assert_instr(vpmaxuq))]
2387	pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2388	let max: u64x8 = _mm512_max_epu64(a, b).as_u64x8();
2389	let zero: u64x8 = _mm512_setzero_si512().as_u64x8();
2390	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2391	}
2392
2393	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2394	///
2395	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2396	#[inline]
2397	#[target_feature(enable = "avx512f,avx512vl")]
2398	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2399	#[cfg_attr(test, assert_instr(vpmaxuq))]
2400	pub unsafe fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2401	transmute(src:vpmaxuq256(a:a.as_u64x4(), b:b.as_u64x4()))
2402	}
2403
2404	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2405	///
2406	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2407	#[inline]
2408	#[target_feature(enable = "avx512f,avx512vl")]
2409	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2410	#[cfg_attr(test, assert_instr(vpmaxuq))]
2411	pub unsafe fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2412	let max: u64x4 = _mm256_max_epu64(a, b).as_u64x4();
2413	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x4()))
2414	}
2415
2416	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2417	///
2418	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2419	#[inline]
2420	#[target_feature(enable = "avx512f,avx512vl")]
2421	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2422	#[cfg_attr(test, assert_instr(vpmaxuq))]
2423	pub unsafe fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2424	let max: u64x4 = _mm256_max_epu64(a, b).as_u64x4();
2425	let zero: u64x4 = _mm256_setzero_si256().as_u64x4();
2426	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2427	}
2428
2429	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2430	///
2431	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2432	#[inline]
2433	#[target_feature(enable = "avx512f,avx512vl")]
2434	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2435	#[cfg_attr(test, assert_instr(vpmaxuq))]
2436	pub unsafe fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2437	transmute(src:vpmaxuq128(a:a.as_u64x2(), b:b.as_u64x2()))
2438	}
2439
2440	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2441	///
2442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2443	#[inline]
2444	#[target_feature(enable = "avx512f,avx512vl")]
2445	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2446	#[cfg_attr(test, assert_instr(vpmaxuq))]
2447	pub unsafe fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2448	let max: u64x2 = _mm_max_epu64(a, b).as_u64x2();
2449	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x2()))
2450	}
2451
2452	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2453	///
2454	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2455	#[inline]
2456	#[target_feature(enable = "avx512f,avx512vl")]
2457	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2458	#[cfg_attr(test, assert_instr(vpmaxuq))]
2459	pub unsafe fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2460	let max: u64x2 = _mm_max_epu64(a, b).as_u64x2();
2461	let zero: u64x2 = _mm_setzero_si128().as_u64x2();
2462	transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2463	}
2464
2465	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2466	///
2467	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2468	#[inline]
2469	#[target_feature(enable = "avx512f")]
2470	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2471	#[cfg_attr(test, assert_instr(vpminsd))]
2472	pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2473	transmute(src:vpminsd(a:a.as_i32x16(), b:b.as_i32x16()))
2474	}
2475
2476	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2477	///
2478	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2479	#[inline]
2480	#[target_feature(enable = "avx512f")]
2481	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2482	#[cfg_attr(test, assert_instr(vpminsd))]
2483	pub unsafe fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2484	let min: i32x16 = _mm512_min_epi32(a, b).as_i32x16();
2485	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x16()))
2486	}
2487
2488	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2489	///
2490	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2491	#[inline]
2492	#[target_feature(enable = "avx512f")]
2493	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2494	#[cfg_attr(test, assert_instr(vpminsd))]
2495	pub unsafe fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2496	let min: i32x16 = _mm512_min_epi32(a, b).as_i32x16();
2497	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
2498	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2499	}
2500
2501	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2502	///
2503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2504	#[inline]
2505	#[target_feature(enable = "avx512f,avx512vl")]
2506	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2507	#[cfg_attr(test, assert_instr(vpminsd))]
2508	pub unsafe fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2509	let min: i32x8 = _mm256_min_epi32(a, b).as_i32x8();
2510	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x8()))
2511	}
2512
2513	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2514	///
2515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2516	#[inline]
2517	#[target_feature(enable = "avx512f,avx512vl")]
2518	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2519	#[cfg_attr(test, assert_instr(vpminsd))]
2520	pub unsafe fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2521	let min: i32x8 = _mm256_min_epi32(a, b).as_i32x8();
2522	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
2523	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2524	}
2525
2526	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2527	///
2528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2529	#[inline]
2530	#[target_feature(enable = "avx512f,avx512vl")]
2531	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2532	#[cfg_attr(test, assert_instr(vpminsd))]
2533	pub unsafe fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2534	let min: i32x4 = _mm_min_epi32(a, b).as_i32x4();
2535	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x4()))
2536	}
2537
2538	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2539	///
2540	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2541	#[inline]
2542	#[target_feature(enable = "avx512f,avx512vl")]
2543	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2544	#[cfg_attr(test, assert_instr(vpminsd))]
2545	pub unsafe fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2546	let min: i32x4 = _mm_min_epi32(a, b).as_i32x4();
2547	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
2548	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2549	}
2550
2551	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2552	///
2553	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
2554	#[inline]
2555	#[target_feature(enable = "avx512f")]
2556	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2557	#[cfg_attr(test, assert_instr(vpminsq))]
2558	pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
2559	transmute(src:vpminsq(a:a.as_i64x8(), b:b.as_i64x8()))
2560	}
2561
2562	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2563	///
2564	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
2565	#[inline]
2566	#[target_feature(enable = "avx512f")]
2567	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2568	#[cfg_attr(test, assert_instr(vpminsq))]
2569	pub unsafe fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2570	let min: i64x8 = _mm512_min_epi64(a, b).as_i64x8();
2571	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x8()))
2572	}
2573
2574	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2575	///
2576	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_min_epi64&expand=3704)
2577	#[inline]
2578	#[target_feature(enable = "avx512f")]
2579	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2580	#[cfg_attr(test, assert_instr(vpminsq))]
2581	pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2582	let min: i64x8 = _mm512_min_epi64(a, b).as_i64x8();
2583	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
2584	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2585	}
2586
2587	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2588	///
2589	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
2590	#[inline]
2591	#[target_feature(enable = "avx512f,avx512vl")]
2592	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2593	#[cfg_attr(test, assert_instr(vpminsq))]
2594	pub unsafe fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
2595	transmute(src:vpminsq256(a:a.as_i64x4(), b:b.as_i64x4()))
2596	}
2597
2598	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2599	///
2600	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
2601	#[inline]
2602	#[target_feature(enable = "avx512f,avx512vl")]
2603	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2604	#[cfg_attr(test, assert_instr(vpminsq))]
2605	pub unsafe fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2606	let min: i64x4 = _mm256_min_epi64(a, b).as_i64x4();
2607	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x4()))
2608	}
2609
2610	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2611	///
2612	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
2613	#[inline]
2614	#[target_feature(enable = "avx512f,avx512vl")]
2615	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2616	#[cfg_attr(test, assert_instr(vpminsq))]
2617	pub unsafe fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2618	let min: i64x4 = _mm256_min_epi64(a, b).as_i64x4();
2619	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
2620	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2621	}
2622
2623	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
2624	///
2625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
2626	#[inline]
2627	#[target_feature(enable = "avx512f")]
2628	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2629	#[cfg_attr(test, assert_instr(vminps))]
2630	pub unsafe fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
2631	transmute(src:vminps(
2632	a:a.as_f32x16(),
2633	b:b.as_f32x16(),
2634	_MM_FROUND_CUR_DIRECTION,
2635	))
2636	}
2637
2638	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2639	///
2640	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
2641	#[inline]
2642	#[target_feature(enable = "avx512f")]
2643	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2644	#[cfg_attr(test, assert_instr(vminps))]
2645	pub unsafe fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2646	let min: f32x16 = _mm512_min_ps(a, b).as_f32x16();
2647	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x16()))
2648	}
2649
2650	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2651	///
2652	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
2653	#[inline]
2654	#[target_feature(enable = "avx512f")]
2655	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2656	#[cfg_attr(test, assert_instr(vminps))]
2657	pub unsafe fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2658	let min: f32x16 = _mm512_min_ps(a, b).as_f32x16();
2659	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
2660	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2661	}
2662
2663	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2664	///
2665	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
2666	#[inline]
2667	#[target_feature(enable = "avx512f,avx512vl")]
2668	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2669	#[cfg_attr(test, assert_instr(vminps))]
2670	pub unsafe fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2671	let min: f32x8 = _mm256_min_ps(a, b).as_f32x8();
2672	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x8()))
2673	}
2674
2675	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2676	///
2677	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
2678	#[inline]
2679	#[target_feature(enable = "avx512f,avx512vl")]
2680	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2681	#[cfg_attr(test, assert_instr(vminps))]
2682	pub unsafe fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2683	let min: f32x8 = _mm256_min_ps(a, b).as_f32x8();
2684	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
2685	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2686	}
2687
2688	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2689	///
2690	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
2691	#[inline]
2692	#[target_feature(enable = "avx512f,avx512vl")]
2693	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2694	#[cfg_attr(test, assert_instr(vminps))]
2695	pub unsafe fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2696	let min: f32x4 = _mm_min_ps(a, b).as_f32x4();
2697	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x4()))
2698	}
2699
2700	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2701	///
2702	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
2703	#[inline]
2704	#[target_feature(enable = "avx512f,avx512vl")]
2705	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2706	#[cfg_attr(test, assert_instr(vminps))]
2707	pub unsafe fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2708	let min: f32x4 = _mm_min_ps(a, b).as_f32x4();
2709	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
2710	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2711	}
2712
2713	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
2714
2715	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
2716	///
2717	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_min_pd&expand=3759)
2718	#[inline]
2719	#[target_feature(enable = "avx512f")]
2720	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2721	#[cfg_attr(test, assert_instr(vminpd))]
2722	pub unsafe fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
2723	transmute(src:vminpd(a:a.as_f64x8(), b:b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
2724	}
2725
2726	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2727	///
2728	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_min_pd&expand=3757)
2729	#[inline]
2730	#[target_feature(enable = "avx512f")]
2731	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2732	#[cfg_attr(test, assert_instr(vminpd))]
2733	pub unsafe fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2734	let min: f64x8 = _mm512_min_pd(a, b).as_f64x8();
2735	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x8()))
2736	}
2737
2738	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2739	///
2740	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_min_pd&expand=3758)
2741	#[inline]
2742	#[target_feature(enable = "avx512f")]
2743	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2744	#[cfg_attr(test, assert_instr(vminpd))]
2745	pub unsafe fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2746	let min: f64x8 = _mm512_min_pd(a, b).as_f64x8();
2747	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
2748	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2749	}
2750
2751	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2752	///
2753	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
2754	#[inline]
2755	#[target_feature(enable = "avx512f,avx512vl")]
2756	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2757	#[cfg_attr(test, assert_instr(vminpd))]
2758	pub unsafe fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2759	let min: f64x4 = _mm256_min_pd(a, b).as_f64x4();
2760	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x4()))
2761	}
2762
2763	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2764	///
2765	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
2766	#[inline]
2767	#[target_feature(enable = "avx512f,avx512vl")]
2768	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2769	#[cfg_attr(test, assert_instr(vminpd))]
2770	pub unsafe fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2771	let min: f64x4 = _mm256_min_pd(a, b).as_f64x4();
2772	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
2773	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2774	}
2775
2776	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2777	///
2778	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
2779	#[inline]
2780	#[target_feature(enable = "avx512f,avx512vl")]
2781	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2782	#[cfg_attr(test, assert_instr(vminpd))]
2783	pub unsafe fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2784	let min: f64x2 = _mm_min_pd(a, b).as_f64x2();
2785	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x2()))
2786	}
2787
2788	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2789	///
2790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
2791	#[inline]
2792	#[target_feature(enable = "avx512f,avx512vl")]
2793	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2794	#[cfg_attr(test, assert_instr(vminpd))]
2795	pub unsafe fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2796	let min: f64x2 = _mm_min_pd(a, b).as_f64x2();
2797	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
2798	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2799	}
2800
2801	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
2802	///
2803	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
2804	#[inline]
2805	#[target_feature(enable = "avx512f")]
2806	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2807	#[cfg_attr(test, assert_instr(vpminud))]
2808	pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
2809	transmute(src:vpminud(a:a.as_u32x16(), b:b.as_u32x16()))
2810	}
2811
2812	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2813	///
2814	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
2815	#[inline]
2816	#[target_feature(enable = "avx512f")]
2817	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2818	#[cfg_attr(test, assert_instr(vpminud))]
2819	pub unsafe fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2820	let min: u32x16 = _mm512_min_epu32(a, b).as_u32x16();
2821	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x16()))
2822	}
2823
2824	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2825	///
2826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
2827	#[inline]
2828	#[target_feature(enable = "avx512f")]
2829	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2830	#[cfg_attr(test, assert_instr(vpminud))]
2831	pub unsafe fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2832	let min: u32x16 = _mm512_min_epu32(a, b).as_u32x16();
2833	let zero: u32x16 = _mm512_setzero_si512().as_u32x16();
2834	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2835	}
2836
2837	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2838	///
2839	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
2840	#[inline]
2841	#[target_feature(enable = "avx512f,avx512vl")]
2842	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2843	#[cfg_attr(test, assert_instr(vpminud))]
2844	pub unsafe fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2845	let min: u32x8 = _mm256_min_epu32(a, b).as_u32x8();
2846	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x8()))
2847	}
2848
2849	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2850	///
2851	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
2852	#[inline]
2853	#[target_feature(enable = "avx512f,avx512vl")]
2854	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2855	#[cfg_attr(test, assert_instr(vpminud))]
2856	pub unsafe fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2857	let min: u32x8 = _mm256_min_epu32(a, b).as_u32x8();
2858	let zero: u32x8 = _mm256_setzero_si256().as_u32x8();
2859	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2860	}
2861
2862	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2863	///
2864	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
2865	#[inline]
2866	#[target_feature(enable = "avx512f,avx512vl")]
2867	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2868	#[cfg_attr(test, assert_instr(vpminud))]
2869	pub unsafe fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2870	let min: u32x4 = _mm_min_epu32(a, b).as_u32x4();
2871	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x4()))
2872	}
2873
2874	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2875	///
2876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
2877	#[inline]
2878	#[target_feature(enable = "avx512f,avx512vl")]
2879	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2880	#[cfg_attr(test, assert_instr(vpminud))]
2881	pub unsafe fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2882	let min: u32x4 = _mm_min_epu32(a, b).as_u32x4();
2883	let zero: u32x4 = _mm_setzero_si128().as_u32x4();
2884	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2885	}
2886
2887	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
2888	///
2889	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
2890	#[inline]
2891	#[target_feature(enable = "avx512f")]
2892	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2893	#[cfg_attr(test, assert_instr(vpminuq))]
2894	pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
2895	transmute(src:vpminuq(a:a.as_u64x8(), b:b.as_u64x8()))
2896	}
2897
2898	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2899	///
2900	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
2901	#[inline]
2902	#[target_feature(enable = "avx512f")]
2903	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2904	#[cfg_attr(test, assert_instr(vpminuq))]
2905	pub unsafe fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2906	let min: u64x8 = _mm512_min_epu64(a, b).as_u64x8();
2907	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x8()))
2908	}
2909
2910	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2911	///
2912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
2913	#[inline]
2914	#[target_feature(enable = "avx512f")]
2915	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2916	#[cfg_attr(test, assert_instr(vpminuq))]
2917	pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2918	let min: u64x8 = _mm512_min_epu64(a, b).as_u64x8();
2919	let zero: u64x8 = _mm512_setzero_si512().as_u64x8();
2920	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2921	}
2922
2923	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
2924	///
2925	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
2926	#[inline]
2927	#[target_feature(enable = "avx512f,avx512vl")]
2928	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2929	#[cfg_attr(test, assert_instr(vpminuq))]
2930	pub unsafe fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
2931	transmute(src:vpminuq256(a:a.as_u64x4(), b:b.as_u64x4()))
2932	}
2933
2934	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2935	///
2936	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
2937	#[inline]
2938	#[target_feature(enable = "avx512f,avx512vl")]
2939	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2940	#[cfg_attr(test, assert_instr(vpminuq))]
2941	pub unsafe fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2942	let min: u64x4 = _mm256_min_epu64(a, b).as_u64x4();
2943	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x4()))
2944	}
2945
2946	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2947	///
2948	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
2949	#[inline]
2950	#[target_feature(enable = "avx512f,avx512vl")]
2951	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2952	#[cfg_attr(test, assert_instr(vpminuq))]
2953	pub unsafe fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2954	let min: u64x4 = _mm256_min_epu64(a, b).as_u64x4();
2955	let zero: u64x4 = _mm256_setzero_si256().as_u64x4();
2956	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2957	}
2958
2959	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
2960	///
2961	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
2962	#[inline]
2963	#[target_feature(enable = "avx512f,avx512vl")]
2964	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2965	#[cfg_attr(test, assert_instr(vpminuq))]
2966	pub unsafe fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
2967	transmute(src:vpminuq128(a:a.as_u64x2(), b:b.as_u64x2()))
2968	}
2969
2970	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2971	///
2972	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
2973	#[inline]
2974	#[target_feature(enable = "avx512f,avx512vl")]
2975	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2976	#[cfg_attr(test, assert_instr(vpminuq))]
2977	pub unsafe fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2978	let min: u64x2 = _mm_min_epu64(a, b).as_u64x2();
2979	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x2()))
2980	}
2981
2982	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2983	///
2984	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
2985	#[inline]
2986	#[target_feature(enable = "avx512f,avx512vl")]
2987	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2988	#[cfg_attr(test, assert_instr(vpminuq))]
2989	pub unsafe fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2990	let min: u64x2 = _mm_min_epu64(a, b).as_u64x2();
2991	let zero: u64x2 = _mm_setzero_si128().as_u64x2();
2992	transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2993	}
2994
2995	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
2996	///
2997	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
2998	#[inline]
2999	#[target_feature(enable = "avx512f")]
3000	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3001	#[cfg_attr(test, assert_instr(vsqrtps))]
3002	pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3003	transmute(src:vsqrtps(a:a.as_f32x16(), _MM_FROUND_CUR_DIRECTION))
3004	}
3005
3006	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3007	///
3008	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3009	#[inline]
3010	#[target_feature(enable = "avx512f")]
3011	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3012	#[cfg_attr(test, assert_instr(vsqrtps))]
3013	pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3014	let sqrt: f32x16 = _mm512_sqrt_ps(a).as_f32x16();
3015	transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:src.as_f32x16()))
3016	}
3017
3018	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3019	///
3020	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3021	#[inline]
3022	#[target_feature(enable = "avx512f")]
3023	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3024	#[cfg_attr(test, assert_instr(vsqrtps))]
3025	pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3026	let sqrt: f32x16 = _mm512_sqrt_ps(a).as_f32x16();
3027	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
3028	transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:zero))
3029	}
3030
3031	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3032	///
3033	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3034	#[inline]
3035	#[target_feature(enable = "avx512f,avx512vl")]
3036	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3037	#[cfg_attr(test, assert_instr(vsqrtps))]
3038	pub unsafe fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3039	let sqrt: f32x8 = _mm256_sqrt_ps(a).as_f32x8();
3040	transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:src.as_f32x8()))
3041	}
3042
3043	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3044	///
3045	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3046	#[inline]
3047	#[target_feature(enable = "avx512f,avx512vl")]
3048	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3049	#[cfg_attr(test, assert_instr(vsqrtps))]
3050	pub unsafe fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3051	let sqrt: f32x8 = _mm256_sqrt_ps(a).as_f32x8();
3052	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
3053	transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:zero))
3054	}
3055
3056	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3057	///
3058	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3059	#[inline]
3060	#[target_feature(enable = "avx512f,avx512vl")]
3061	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3062	#[cfg_attr(test, assert_instr(vsqrtps))]
3063	pub unsafe fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3064	let sqrt: f32x4 = _mm_sqrt_ps(a).as_f32x4();
3065	transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:src.as_f32x4()))
3066	}
3067
3068	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3069	///
3070	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3071	#[inline]
3072	#[target_feature(enable = "avx512f,avx512vl")]
3073	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3074	#[cfg_attr(test, assert_instr(vsqrtps))]
3075	pub unsafe fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3076	let sqrt: f32x4 = _mm_sqrt_ps(a).as_f32x4();
3077	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
3078	transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:zero))
3079	}
3080
3081	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3082	///
3083	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3084	#[inline]
3085	#[target_feature(enable = "avx512f")]
3086	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3087	#[cfg_attr(test, assert_instr(vsqrtpd))]
3088	pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3089	transmute(src:vsqrtpd(a:a.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
3090	}
3091
3092	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3093	///
3094	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3095	#[inline]
3096	#[target_feature(enable = "avx512f")]
3097	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3098	#[cfg_attr(test, assert_instr(vsqrtpd))]
3099	pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3100	let sqrt: f64x8 = _mm512_sqrt_pd(a).as_f64x8();
3101	transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:src.as_f64x8()))
3102	}
3103
3104	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3105	///
3106	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3107	#[inline]
3108	#[target_feature(enable = "avx512f")]
3109	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3110	#[cfg_attr(test, assert_instr(vsqrtpd))]
3111	pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3112	let sqrt: f64x8 = _mm512_sqrt_pd(a).as_f64x8();
3113	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
3114	transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:zero))
3115	}
3116
3117	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3118	///
3119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3120	#[inline]
3121	#[target_feature(enable = "avx512f,avx512vl")]
3122	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3123	#[cfg_attr(test, assert_instr(vsqrtpd))]
3124	pub unsafe fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3125	let sqrt: f64x4 = _mm256_sqrt_pd(a).as_f64x4();
3126	transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:src.as_f64x4()))
3127	}
3128
3129	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3130	///
3131	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3132	#[inline]
3133	#[target_feature(enable = "avx512f,avx512vl")]
3134	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3135	#[cfg_attr(test, assert_instr(vsqrtpd))]
3136	pub unsafe fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3137	let sqrt: f64x4 = _mm256_sqrt_pd(a).as_f64x4();
3138	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
3139	transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:zero))
3140	}
3141
3142	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3143	///
3144	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3145	#[inline]
3146	#[target_feature(enable = "avx512f,avx512vl")]
3147	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3148	#[cfg_attr(test, assert_instr(vsqrtpd))]
3149	pub unsafe fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3150	let sqrt: f64x2 = _mm_sqrt_pd(a).as_f64x2();
3151	transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:src.as_f64x2()))
3152	}
3153
3154	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3155	///
3156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3157	#[inline]
3158	#[target_feature(enable = "avx512f,avx512vl")]
3159	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3160	#[cfg_attr(test, assert_instr(vsqrtpd))]
3161	pub unsafe fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3162	let sqrt: f64x2 = _mm_sqrt_pd(a).as_f64x2();
3163	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
3164	transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:zero))
3165	}
3166
3167	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3168	///
3169	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3170	#[inline]
3171	#[target_feature(enable = "avx512f")]
3172	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3173	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3174	pub unsafe fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3175	transmute(src:vfmadd132ps(a:a.as_f32x16(), b:b.as_f32x16(), c:c.as_f32x16()))
3176	}
3177
3178	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3179	///
3180	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3181	#[inline]
3182	#[target_feature(enable = "avx512f")]
3183	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3184	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3185	pub unsafe fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3186	let fmadd: f32x16 = _mm512_fmadd_ps(a, b, c).as_f32x16();
3187	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:a.as_f32x16()))
3188	}
3189
3190	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3191	///
3192	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3193	#[inline]
3194	#[target_feature(enable = "avx512f")]
3195	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3196	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3197	pub unsafe fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3198	let fmadd: f32x16 = _mm512_fmadd_ps(a, b, c).as_f32x16();
3199	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
3200	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:zero))
3201	}
3202
3203	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3204	///
3205	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3206	#[inline]
3207	#[target_feature(enable = "avx512f")]
3208	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3209	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3210	pub unsafe fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3211	let fmadd: f32x16 = _mm512_fmadd_ps(a, b, c).as_f32x16();
3212	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:c.as_f32x16()))
3213	}
3214
3215	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3216	///
3217	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3218	#[inline]
3219	#[target_feature(enable = "avx512f,avx512vl")]
3220	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3221	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3222	pub unsafe fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3223	let fmadd: f32x8 = _mm256_fmadd_ps(a, b, c).as_f32x8();
3224	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:a.as_f32x8()))
3225	}
3226
3227	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3228	///
3229	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3230	#[inline]
3231	#[target_feature(enable = "avx512f,avx512vl")]
3232	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3233	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3234	pub unsafe fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3235	let fmadd: f32x8 = _mm256_fmadd_ps(a, b, c).as_f32x8();
3236	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
3237	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:zero))
3238	}
3239
3240	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3241	///
3242	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3243	#[inline]
3244	#[target_feature(enable = "avx512f,avx512vl")]
3245	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3246	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3247	pub unsafe fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3248	let fmadd: f32x8 = _mm256_fmadd_ps(a, b, c).as_f32x8();
3249	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:c.as_f32x8()))
3250	}
3251
3252	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3253	///
3254	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3255	#[inline]
3256	#[target_feature(enable = "avx512f,avx512vl")]
3257	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3258	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3259	pub unsafe fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3260	let fmadd: f32x4 = _mm_fmadd_ps(a, b, c).as_f32x4();
3261	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:a.as_f32x4()))
3262	}
3263
3264	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3265	///
3266	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3267	#[inline]
3268	#[target_feature(enable = "avx512f,avx512vl")]
3269	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3270	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3271	pub unsafe fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3272	let fmadd: f32x4 = _mm_fmadd_ps(a, b, c).as_f32x4();
3273	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
3274	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:zero))
3275	}
3276
3277	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3278	///
3279	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3280	#[inline]
3281	#[target_feature(enable = "avx512f,avx512vl")]
3282	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3283	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3284	pub unsafe fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3285	let fmadd: f32x4 = _mm_fmadd_ps(a, b, c).as_f32x4();
3286	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:c.as_f32x4()))
3287	}
3288
3289	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3290	///
3291	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3292	#[inline]
3293	#[target_feature(enable = "avx512f")]
3294	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3295	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3296	pub unsafe fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3297	transmute(src:vfmadd132pd(a:a.as_f64x8(), b:b.as_f64x8(), c:c.as_f64x8()))
3298	}
3299
3300	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3301	///
3302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3303	#[inline]
3304	#[target_feature(enable = "avx512f")]
3305	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3306	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3307	pub unsafe fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3308	let fmadd: f64x8 = _mm512_fmadd_pd(a, b, c).as_f64x8();
3309	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:a.as_f64x8()))
3310	}
3311
3312	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3313	///
3314	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3315	#[inline]
3316	#[target_feature(enable = "avx512f")]
3317	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3318	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3319	pub unsafe fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3320	let fmadd: f64x8 = _mm512_fmadd_pd(a, b, c).as_f64x8();
3321	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
3322	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:zero))
3323	}
3324
3325	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3326	///
3327	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3328	#[inline]
3329	#[target_feature(enable = "avx512f")]
3330	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3331	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3332	pub unsafe fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3333	let fmadd: f64x8 = _mm512_fmadd_pd(a, b, c).as_f64x8();
3334	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:c.as_f64x8()))
3335	}
3336
3337	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3338	///
3339	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3340	#[inline]
3341	#[target_feature(enable = "avx512f,avx512vl")]
3342	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3343	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3344	pub unsafe fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3345	let fmadd: f64x4 = _mm256_fmadd_pd(a, b, c).as_f64x4();
3346	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:a.as_f64x4()))
3347	}
3348
3349	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3350	///
3351	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3352	#[inline]
3353	#[target_feature(enable = "avx512f,avx512vl")]
3354	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3355	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3356	pub unsafe fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3357	let fmadd: f64x4 = _mm256_fmadd_pd(a, b, c).as_f64x4();
3358	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
3359	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:zero))
3360	}
3361
3362	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3363	///
3364	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3365	#[inline]
3366	#[target_feature(enable = "avx512f,avx512vl")]
3367	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3368	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3369	pub unsafe fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3370	let fmadd: f64x4 = _mm256_fmadd_pd(a, b, c).as_f64x4();
3371	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:c.as_f64x4()))
3372	}
3373
3374	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3375	///
3376	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3377	#[inline]
3378	#[target_feature(enable = "avx512f,avx512vl")]
3379	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3380	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3381	pub unsafe fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3382	let fmadd: f64x2 = _mm_fmadd_pd(a, b, c).as_f64x2();
3383	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:a.as_f64x2()))
3384	}
3385
3386	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3387	///
3388	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3389	#[inline]
3390	#[target_feature(enable = "avx512f,avx512vl")]
3391	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3392	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3393	pub unsafe fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3394	let fmadd: f64x2 = _mm_fmadd_pd(a, b, c).as_f64x2();
3395	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
3396	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:zero))
3397	}
3398
3399	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3400	///
3401	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3402	#[inline]
3403	#[target_feature(enable = "avx512f,avx512vl")]
3404	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3405	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3406	pub unsafe fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3407	let fmadd: f64x2 = _mm_fmadd_pd(a, b, c).as_f64x2();
3408	transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:c.as_f64x2()))
3409	}
3410
3411	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3412	///
3413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3414	#[inline]
3415	#[target_feature(enable = "avx512f")]
3416	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3417	#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3418	pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3419	let zero: f32x16 = mem::zeroed();
3420	let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
3421	transmute(src:vfmadd132ps(a:a.as_f32x16(), b:b.as_f32x16(), c:sub))
3422	}
3423
3424	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3425	///
3426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3427	#[inline]
3428	#[target_feature(enable = "avx512f")]
3429	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3430	#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3431	pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3432	let fmsub: f32x16 = _mm512_fmsub_ps(a, b, c).as_f32x16();
3433	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:a.as_f32x16()))
3434	}
3435
3436	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3437	///
3438	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3439	#[inline]
3440	#[target_feature(enable = "avx512f")]
3441	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3442	#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3443	pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3444	let fmsub: f32x16 = _mm512_fmsub_ps(a, b, c).as_f32x16();
3445	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
3446	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:zero))
3447	}
3448
3449	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3450	///
3451	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3452	#[inline]
3453	#[target_feature(enable = "avx512f")]
3454	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3455	#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3456	pub unsafe fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3457	let fmsub: f32x16 = _mm512_fmsub_ps(a, b, c).as_f32x16();
3458	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:c.as_f32x16()))
3459	}
3460
3461	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3462	///
3463	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3464	#[inline]
3465	#[target_feature(enable = "avx512f,avx512vl")]
3466	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3467	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3468	pub unsafe fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3469	let fmsub: f32x8 = _mm256_fmsub_ps(a, b, c).as_f32x8();
3470	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:a.as_f32x8()))
3471	}
3472
3473	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3474	///
3475	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
3476	#[inline]
3477	#[target_feature(enable = "avx512f,avx512vl")]
3478	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3479	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3480	pub unsafe fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3481	let fmsub: f32x8 = _mm256_fmsub_ps(a, b, c).as_f32x8();
3482	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
3483	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:zero))
3484	}
3485
3486	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3487	///
3488	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
3489	#[inline]
3490	#[target_feature(enable = "avx512f,avx512vl")]
3491	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3492	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3493	pub unsafe fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3494	let fmsub: f32x8 = _mm256_fmsub_ps(a, b, c).as_f32x8();
3495	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:c.as_f32x8()))
3496	}
3497
3498	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3499	///
3500	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
3501	#[inline]
3502	#[target_feature(enable = "avx512f,avx512vl")]
3503	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3504	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3505	pub unsafe fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3506	let fmsub: f32x4 = _mm_fmsub_ps(a, b, c).as_f32x4();
3507	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:a.as_f32x4()))
3508	}
3509
3510	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3511	///
3512	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
3513	#[inline]
3514	#[target_feature(enable = "avx512f,avx512vl")]
3515	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3516	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3517	pub unsafe fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3518	let fmsub: f32x4 = _mm_fmsub_ps(a, b, c).as_f32x4();
3519	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
3520	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:zero))
3521	}
3522
3523	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3524	///
3525	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
3526	#[inline]
3527	#[target_feature(enable = "avx512f,avx512vl")]
3528	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3529	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3530	pub unsafe fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3531	let fmsub: f32x4 = _mm_fmsub_ps(a, b, c).as_f32x4();
3532	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:c.as_f32x4()))
3533	}
3534
3535	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3536	///
3537	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
3538	#[inline]
3539	#[target_feature(enable = "avx512f")]
3540	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3541	#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3542	pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3543	let zero: f64x8 = mem::zeroed();
3544	let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
3545	transmute(src:vfmadd132pd(a:a.as_f64x8(), b:b.as_f64x8(), c:sub))
3546	}
3547
3548	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3549	///
3550	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
3551	#[inline]
3552	#[target_feature(enable = "avx512f")]
3553	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3554	#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3555	pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3556	let fmsub: f64x8 = _mm512_fmsub_pd(a, b, c).as_f64x8();
3557	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:a.as_f64x8()))
3558	}
3559
3560	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3561	///
3562	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
3563	#[inline]
3564	#[target_feature(enable = "avx512f")]
3565	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3566	#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3567	pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3568	let fmsub: f64x8 = _mm512_fmsub_pd(a, b, c).as_f64x8();
3569	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
3570	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:zero))
3571	}
3572
3573	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3574	///
3575	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
3576	#[inline]
3577	#[target_feature(enable = "avx512f")]
3578	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3579	#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3580	pub unsafe fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3581	let fmsub: f64x8 = _mm512_fmsub_pd(a, b, c).as_f64x8();
3582	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:c.as_f64x8()))
3583	}
3584
3585	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3586	///
3587	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
3588	#[inline]
3589	#[target_feature(enable = "avx512f,avx512vl")]
3590	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3591	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3592	pub unsafe fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3593	let fmsub: f64x4 = _mm256_fmsub_pd(a, b, c).as_f64x4();
3594	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:a.as_f64x4()))
3595	}
3596
3597	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3598	///
3599	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
3600	#[inline]
3601	#[target_feature(enable = "avx512f,avx512vl")]
3602	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3603	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3604	pub unsafe fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3605	let fmsub: f64x4 = _mm256_fmsub_pd(a, b, c).as_f64x4();
3606	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
3607	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:zero))
3608	}
3609
3610	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3611	///
3612	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
3613	#[inline]
3614	#[target_feature(enable = "avx512f,avx512vl")]
3615	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3616	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3617	pub unsafe fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3618	let fmsub: f64x4 = _mm256_fmsub_pd(a, b, c).as_f64x4();
3619	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:c.as_f64x4()))
3620	}
3621
3622	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3623	///
3624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
3625	#[inline]
3626	#[target_feature(enable = "avx512f,avx512vl")]
3627	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3628	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3629	pub unsafe fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3630	let fmsub: f64x2 = _mm_fmsub_pd(a, b, c).as_f64x2();
3631	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:a.as_f64x2()))
3632	}
3633
3634	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3635	///
3636	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
3637	#[inline]
3638	#[target_feature(enable = "avx512f,avx512vl")]
3639	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3640	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3641	pub unsafe fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3642	let fmsub: f64x2 = _mm_fmsub_pd(a, b, c).as_f64x2();
3643	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
3644	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:zero))
3645	}
3646
3647	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3648	///
3649	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
3650	#[inline]
3651	#[target_feature(enable = "avx512f,avx512vl")]
3652	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3653	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3654	pub unsafe fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3655	let fmsub: f64x2 = _mm_fmsub_pd(a, b, c).as_f64x2();
3656	transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:c.as_f64x2()))
3657	}
3658
3659	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3660	///
3661	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
3662	#[inline]
3663	#[target_feature(enable = "avx512f")]
3664	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3665	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3666	pub unsafe fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3667	transmute(src:vfmaddsub213ps(
3668	a:a.as_f32x16(),
3669	b:b.as_f32x16(),
3670	c:c.as_f32x16(),
3671	_MM_FROUND_CUR_DIRECTION,
3672	))
3673	}
3674
3675	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3676	///
3677	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
3678	#[inline]
3679	#[target_feature(enable = "avx512f")]
3680	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3681	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3682	pub unsafe fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3683	let fmaddsub: f32x16 = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
3684	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:a.as_f32x16()))
3685	}
3686
3687	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3688	///
3689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
3690	#[inline]
3691	#[target_feature(enable = "avx512f")]
3692	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3693	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3694	pub unsafe fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3695	let fmaddsub: f32x16 = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
3696	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
3697	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:zero))
3698	}
3699
3700	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3701	///
3702	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
3703	#[inline]
3704	#[target_feature(enable = "avx512f")]
3705	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3706	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3707	pub unsafe fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3708	let fmaddsub: f32x16 = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
3709	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:c.as_f32x16()))
3710	}
3711
3712	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3713	///
3714	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
3715	#[inline]
3716	#[target_feature(enable = "avx512f,avx512vl")]
3717	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3718	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3719	pub unsafe fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3720	let fmaddsub: f32x8 = _mm256_fmaddsub_ps(a, b, c).as_f32x8();
3721	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:a.as_f32x8()))
3722	}
3723
3724	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3725	///
3726	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
3727	#[inline]
3728	#[target_feature(enable = "avx512f,avx512vl")]
3729	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3730	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3731	pub unsafe fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3732	let fmaddsub: f32x8 = _mm256_fmaddsub_ps(a, b, c).as_f32x8();
3733	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
3734	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:zero))
3735	}
3736
3737	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3738	///
3739	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
3740	#[inline]
3741	#[target_feature(enable = "avx512f,avx512vl")]
3742	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3743	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3744	pub unsafe fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3745	let fmaddsub: f32x8 = _mm256_fmaddsub_ps(a, b, c).as_f32x8();
3746	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:c.as_f32x8()))
3747	}
3748
3749	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3750	///
3751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
3752	#[inline]
3753	#[target_feature(enable = "avx512f,avx512vl")]
3754	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3755	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3756	pub unsafe fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3757	let fmaddsub: f32x4 = _mm_fmaddsub_ps(a, b, c).as_f32x4();
3758	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:a.as_f32x4()))
3759	}
3760
3761	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3762	///
3763	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
3764	#[inline]
3765	#[target_feature(enable = "avx512f,avx512vl")]
3766	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3767	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3768	pub unsafe fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3769	let fmaddsub: f32x4 = _mm_fmaddsub_ps(a, b, c).as_f32x4();
3770	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
3771	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:zero))
3772	}
3773
3774	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3775	///
3776	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
3777	#[inline]
3778	#[target_feature(enable = "avx512f,avx512vl")]
3779	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3780	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3781	pub unsafe fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3782	let fmaddsub: f32x4 = _mm_fmaddsub_ps(a, b, c).as_f32x4();
3783	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:c.as_f32x4()))
3784	}
3785
3786	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3787	///
3788	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
3789	#[inline]
3790	#[target_feature(enable = "avx512f")]
3791	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3792	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3793	pub unsafe fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3794	transmute(src:vfmaddsub213pd(
3795	a:a.as_f64x8(),
3796	b:b.as_f64x8(),
3797	c:c.as_f64x8(),
3798	_MM_FROUND_CUR_DIRECTION,
3799	))
3800	}
3801
3802	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3803	///
3804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
3805	#[inline]
3806	#[target_feature(enable = "avx512f")]
3807	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3808	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3809	pub unsafe fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3810	let fmaddsub: f64x8 = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
3811	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:a.as_f64x8()))
3812	}
3813
3814	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3815	///
3816	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
3817	#[inline]
3818	#[target_feature(enable = "avx512f")]
3819	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3820	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3821	pub unsafe fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3822	let fmaddsub: f64x8 = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
3823	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
3824	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:zero))
3825	}
3826
3827	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3828	///
3829	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
3830	#[inline]
3831	#[target_feature(enable = "avx512f")]
3832	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3833	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3834	pub unsafe fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3835	let fmaddsub: f64x8 = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
3836	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:c.as_f64x8()))
3837	}
3838
3839	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3840	///
3841	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
3842	#[inline]
3843	#[target_feature(enable = "avx512f,avx512vl")]
3844	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3845	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3846	pub unsafe fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3847	let fmaddsub: f64x4 = _mm256_fmaddsub_pd(a, b, c).as_f64x4();
3848	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:a.as_f64x4()))
3849	}
3850
3851	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3852	///
3853	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
3854	#[inline]
3855	#[target_feature(enable = "avx512f,avx512vl")]
3856	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3857	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3858	pub unsafe fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3859	let fmaddsub: f64x4 = _mm256_fmaddsub_pd(a, b, c).as_f64x4();
3860	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
3861	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:zero))
3862	}
3863
3864	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3865	///
3866	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
3867	#[inline]
3868	#[target_feature(enable = "avx512f,avx512vl")]
3869	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3870	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3871	pub unsafe fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3872	let fmaddsub: f64x4 = _mm256_fmaddsub_pd(a, b, c).as_f64x4();
3873	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:c.as_f64x4()))
3874	}
3875
3876	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3877	///
3878	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
3879	#[inline]
3880	#[target_feature(enable = "avx512f,avx512vl")]
3881	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3882	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3883	pub unsafe fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3884	let fmaddsub: f64x2 = _mm_fmaddsub_pd(a, b, c).as_f64x2();
3885	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:a.as_f64x2()))
3886	}
3887
3888	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3889	///
3890	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
3891	#[inline]
3892	#[target_feature(enable = "avx512f,avx512vl")]
3893	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3894	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3895	pub unsafe fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3896	let fmaddsub: f64x2 = _mm_fmaddsub_pd(a, b, c).as_f64x2();
3897	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
3898	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:zero))
3899	}
3900
3901	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3902	///
3903	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
3904	#[inline]
3905	#[target_feature(enable = "avx512f,avx512vl")]
3906	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3907	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3908	pub unsafe fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3909	let fmaddsub: f64x2 = _mm_fmaddsub_pd(a, b, c).as_f64x2();
3910	transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:c.as_f64x2()))
3911	}
3912
3913	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
3914	///
3915	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
3916	#[inline]
3917	#[target_feature(enable = "avx512f")]
3918	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3919	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3920	pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3921	let zero: f32x16 = mem::zeroed();
3922	let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
3923	transmute(src:vfmaddsub213ps(
3924	a:a.as_f32x16(),
3925	b:b.as_f32x16(),
3926	c:sub,
3927	_MM_FROUND_CUR_DIRECTION,
3928	))
3929	}
3930
3931	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3932	///
3933	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
3934	#[inline]
3935	#[target_feature(enable = "avx512f")]
3936	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3937	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3938	pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3939	let fmsubadd: f32x16 = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
3940	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:a.as_f32x16()))
3941	}
3942
3943	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3944	///
3945	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
3946	#[inline]
3947	#[target_feature(enable = "avx512f")]
3948	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3949	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3950	pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3951	let fmsubadd: f32x16 = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
3952	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
3953	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:zero))
3954	}
3955
3956	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3957	///
3958	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
3959	#[inline]
3960	#[target_feature(enable = "avx512f")]
3961	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3962	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3963	pub unsafe fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3964	let fmsubadd: f32x16 = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
3965	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:c.as_f32x16()))
3966	}
3967
3968	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3969	///
3970	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
3971	#[inline]
3972	#[target_feature(enable = "avx512f,avx512vl")]
3973	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3974	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3975	pub unsafe fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3976	let fmsubadd: f32x8 = _mm256_fmsubadd_ps(a, b, c).as_f32x8();
3977	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:a.as_f32x8()))
3978	}
3979
3980	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3981	///
3982	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
3983	#[inline]
3984	#[target_feature(enable = "avx512f,avx512vl")]
3985	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3986	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3987	pub unsafe fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3988	let fmsubadd: f32x8 = _mm256_fmsubadd_ps(a, b, c).as_f32x8();
3989	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
3990	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:zero))
3991	}
3992
3993	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3994	///
3995	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
3996	#[inline]
3997	#[target_feature(enable = "avx512f,avx512vl")]
3998	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3999	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4000	pub unsafe fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4001	let fmsubadd: f32x8 = _mm256_fmsubadd_ps(a, b, c).as_f32x8();
4002	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:c.as_f32x8()))
4003	}
4004
4005	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4006	///
4007	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4008	#[inline]
4009	#[target_feature(enable = "avx512f,avx512vl")]
4010	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4011	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4012	pub unsafe fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4013	let fmsubadd: f32x4 = _mm_fmsubadd_ps(a, b, c).as_f32x4();
4014	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:a.as_f32x4()))
4015	}
4016
4017	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4018	///
4019	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4020	#[inline]
4021	#[target_feature(enable = "avx512f,avx512vl")]
4022	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4023	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4024	pub unsafe fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4025	let fmsubadd: f32x4 = _mm_fmsubadd_ps(a, b, c).as_f32x4();
4026	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
4027	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:zero))
4028	}
4029
4030	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4031	///
4032	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4033	#[inline]
4034	#[target_feature(enable = "avx512f,avx512vl")]
4035	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4036	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4037	pub unsafe fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4038	let fmsubadd: f32x4 = _mm_fmsubadd_ps(a, b, c).as_f32x4();
4039	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:c.as_f32x4()))
4040	}
4041
4042	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4043	///
4044	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4045	#[inline]
4046	#[target_feature(enable = "avx512f")]
4047	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4048	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4049	pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4050	let zero: f64x8 = mem::zeroed();
4051	let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
4052	transmute(src:vfmaddsub213pd(
4053	a:a.as_f64x8(),
4054	b:b.as_f64x8(),
4055	c:sub,
4056	_MM_FROUND_CUR_DIRECTION,
4057	))
4058	}
4059
4060	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4061	///
4062	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4063	#[inline]
4064	#[target_feature(enable = "avx512f")]
4065	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4066	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4067	pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4068	let fmsubadd: f64x8 = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
4069	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:a.as_f64x8()))
4070	}
4071
4072	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4073	///
4074	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4075	#[inline]
4076	#[target_feature(enable = "avx512f")]
4077	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4078	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4079	pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4080	let fmsubadd: f64x8 = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
4081	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
4082	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:zero))
4083	}
4084
4085	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4086	///
4087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4088	#[inline]
4089	#[target_feature(enable = "avx512f")]
4090	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4091	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4092	pub unsafe fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4093	let fmsubadd: f64x8 = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
4094	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:c.as_f64x8()))
4095	}
4096
4097	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4098	///
4099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4100	#[inline]
4101	#[target_feature(enable = "avx512f,avx512vl")]
4102	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4103	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4104	pub unsafe fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4105	let fmsubadd: f64x4 = _mm256_fmsubadd_pd(a, b, c).as_f64x4();
4106	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:a.as_f64x4()))
4107	}
4108
4109	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4110	///
4111	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4112	#[inline]
4113	#[target_feature(enable = "avx512f,avx512vl")]
4114	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4115	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4116	pub unsafe fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4117	let fmsubadd: f64x4 = _mm256_fmsubadd_pd(a, b, c).as_f64x4();
4118	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
4119	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:zero))
4120	}
4121
4122	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4123	///
4124	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4125	#[inline]
4126	#[target_feature(enable = "avx512f,avx512vl")]
4127	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4128	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4129	pub unsafe fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4130	let fmsubadd: f64x4 = _mm256_fmsubadd_pd(a, b, c).as_f64x4();
4131	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:c.as_f64x4()))
4132	}
4133
4134	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4135	///
4136	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4137	#[inline]
4138	#[target_feature(enable = "avx512f,avx512vl")]
4139	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4140	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4141	pub unsafe fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4142	let fmsubadd: f64x2 = _mm_fmsubadd_pd(a, b, c).as_f64x2();
4143	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:a.as_f64x2()))
4144	}
4145
4146	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4147	///
4148	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4149	#[inline]
4150	#[target_feature(enable = "avx512f,avx512vl")]
4151	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4152	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4153	pub unsafe fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4154	let fmsubadd: f64x2 = _mm_fmsubadd_pd(a, b, c).as_f64x2();
4155	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
4156	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:zero))
4157	}
4158
4159	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4160	///
4161	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4162	#[inline]
4163	#[target_feature(enable = "avx512f,avx512vl")]
4164	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4165	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4166	pub unsafe fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4167	let fmsubadd: f64x2 = _mm_fmsubadd_pd(a, b, c).as_f64x2();
4168	transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:c.as_f64x2()))
4169	}
4170
4171	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4172	///
4173	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4174	#[inline]
4175	#[target_feature(enable = "avx512f")]
4176	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4177	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4178	pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4179	let zero: f32x16 = mem::zeroed();
4180	let sub: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
4181	transmute(src:vfmadd132ps(a:sub, b:b.as_f32x16(), c:c.as_f32x16()))
4182	}
4183
4184	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4185	///
4186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4187	#[inline]
4188	#[target_feature(enable = "avx512f")]
4189	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4190	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4191	pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4192	let fnmadd: f32x16 = _mm512_fnmadd_ps(a, b, c).as_f32x16();
4193	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:a.as_f32x16()))
4194	}
4195
4196	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4197	///
4198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4199	#[inline]
4200	#[target_feature(enable = "avx512f")]
4201	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4202	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4203	pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4204	let fnmadd: f32x16 = _mm512_fnmadd_ps(a, b, c).as_f32x16();
4205	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
4206	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:zero))
4207	}
4208
4209	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4210	///
4211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4212	#[inline]
4213	#[target_feature(enable = "avx512f")]
4214	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4215	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4216	pub unsafe fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4217	let fnmadd: f32x16 = _mm512_fnmadd_ps(a, b, c).as_f32x16();
4218	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:c.as_f32x16()))
4219	}
4220
4221	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4222	///
4223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4224	#[inline]
4225	#[target_feature(enable = "avx512f,avx512vl")]
4226	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4227	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4228	pub unsafe fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4229	let fnmadd: f32x8 = _mm256_fnmadd_ps(a, b, c).as_f32x8();
4230	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:a.as_f32x8()))
4231	}
4232
4233	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4234	///
4235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4236	#[inline]
4237	#[target_feature(enable = "avx512f,avx512vl")]
4238	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4239	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4240	pub unsafe fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4241	let fnmadd: f32x8 = _mm256_fnmadd_ps(a, b, c).as_f32x8();
4242	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
4243	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:zero))
4244	}
4245
4246	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4247	///
4248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4249	#[inline]
4250	#[target_feature(enable = "avx512f,avx512vl")]
4251	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4252	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4253	pub unsafe fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4254	let fnmadd: f32x8 = _mm256_fnmadd_ps(a, b, c).as_f32x8();
4255	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:c.as_f32x8()))
4256	}
4257
4258	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4259	///
4260	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4261	#[inline]
4262	#[target_feature(enable = "avx512f,avx512vl")]
4263	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4264	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4265	pub unsafe fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4266	let fnmadd: f32x4 = _mm_fnmadd_ps(a, b, c).as_f32x4();
4267	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:a.as_f32x4()))
4268	}
4269
4270	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4271	///
4272	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4273	#[inline]
4274	#[target_feature(enable = "avx512f,avx512vl")]
4275	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4276	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4277	pub unsafe fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4278	let fnmadd: f32x4 = _mm_fnmadd_ps(a, b, c).as_f32x4();
4279	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
4280	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:zero))
4281	}
4282
4283	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4284	///
4285	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4286	#[inline]
4287	#[target_feature(enable = "avx512f,avx512vl")]
4288	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4289	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4290	pub unsafe fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4291	let fnmadd: f32x4 = _mm_fnmadd_ps(a, b, c).as_f32x4();
4292	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:c.as_f32x4()))
4293	}
4294
4295	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4296	///
4297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4298	#[inline]
4299	#[target_feature(enable = "avx512f")]
4300	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4301	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4302	pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4303	let zero: f64x8 = mem::zeroed();
4304	let sub: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
4305	transmute(src:vfmadd132pd(a:sub, b:b.as_f64x8(), c:c.as_f64x8()))
4306	}
4307
4308	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4309	///
4310	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4311	#[inline]
4312	#[target_feature(enable = "avx512f")]
4313	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4314	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4315	pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4316	let fnmadd: f64x8 = _mm512_fnmadd_pd(a, b, c).as_f64x8();
4317	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:a.as_f64x8()))
4318	}
4319
4320	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4321	///
4322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4323	#[inline]
4324	#[target_feature(enable = "avx512f")]
4325	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4326	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4327	pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4328	let fnmadd: f64x8 = _mm512_fnmadd_pd(a, b, c).as_f64x8();
4329	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
4330	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:zero))
4331	}
4332
4333	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4334	///
4335	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4336	#[inline]
4337	#[target_feature(enable = "avx512f")]
4338	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4339	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4340	pub unsafe fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4341	let fnmadd: f64x8 = _mm512_fnmadd_pd(a, b, c).as_f64x8();
4342	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:c.as_f64x8()))
4343	}
4344
4345	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4346	///
4347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4348	#[inline]
4349	#[target_feature(enable = "avx512f,avx512vl")]
4350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4351	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4352	pub unsafe fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4353	let fnmadd: f64x4 = _mm256_fnmadd_pd(a, b, c).as_f64x4();
4354	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:a.as_f64x4()))
4355	}
4356
4357	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4358	///
4359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4360	#[inline]
4361	#[target_feature(enable = "avx512f,avx512vl")]
4362	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4363	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4364	pub unsafe fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4365	let fnmadd: f64x4 = _mm256_fnmadd_pd(a, b, c).as_f64x4();
4366	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
4367	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:zero))
4368	}
4369
4370	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4371	///
4372	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4373	#[inline]
4374	#[target_feature(enable = "avx512f,avx512vl")]
4375	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4376	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4377	pub unsafe fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4378	let fnmadd: f64x4 = _mm256_fnmadd_pd(a, b, c).as_f64x4();
4379	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:c.as_f64x4()))
4380	}
4381
4382	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4383	///
4384	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4385	#[inline]
4386	#[target_feature(enable = "avx512f,avx512vl")]
4387	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4388	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4389	pub unsafe fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4390	let fnmadd: f64x2 = _mm_fnmadd_pd(a, b, c).as_f64x2();
4391	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:a.as_f64x2()))
4392	}
4393
4394	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4395	///
4396	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4397	#[inline]
4398	#[target_feature(enable = "avx512f,avx512vl")]
4399	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4400	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4401	pub unsafe fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4402	let fnmadd: f64x2 = _mm_fnmadd_pd(a, b, c).as_f64x2();
4403	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
4404	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:zero))
4405	}
4406
4407	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4408	///
4409	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4410	#[inline]
4411	#[target_feature(enable = "avx512f,avx512vl")]
4412	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4413	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4414	pub unsafe fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4415	let fnmadd: f64x2 = _mm_fnmadd_pd(a, b, c).as_f64x2();
4416	transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:c.as_f64x2()))
4417	}
4418
4419	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4420	///
4421	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4422	#[inline]
4423	#[target_feature(enable = "avx512f")]
4424	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4425	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4426	pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4427	let zero: f32x16 = mem::zeroed();
4428	let suba: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
4429	let subc: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
4430	transmute(src:vfmadd132ps(a:suba, b:b.as_f32x16(), c:subc))
4431	}
4432
4433	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4434	///
4435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4436	#[inline]
4437	#[target_feature(enable = "avx512f")]
4438	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4439	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4440	pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4441	let fnmsub: f32x16 = _mm512_fnmsub_ps(a, b, c).as_f32x16();
4442	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:a.as_f32x16()))
4443	}
4444
4445	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4446	///
4447	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4448	#[inline]
4449	#[target_feature(enable = "avx512f")]
4450	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4451	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4452	pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4453	let fnmsub: f32x16 = _mm512_fnmsub_ps(a, b, c).as_f32x16();
4454	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
4455	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:zero))
4456	}
4457
4458	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4459	///
4460	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4461	#[inline]
4462	#[target_feature(enable = "avx512f")]
4463	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4464	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4465	pub unsafe fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4466	let fnmsub: f32x16 = _mm512_fnmsub_ps(a, b, c).as_f32x16();
4467	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:c.as_f32x16()))
4468	}
4469
4470	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4471	///
4472	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4473	#[inline]
4474	#[target_feature(enable = "avx512f,avx512vl")]
4475	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4476	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4477	pub unsafe fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4478	let fnmsub: f32x8 = _mm256_fnmsub_ps(a, b, c).as_f32x8();
4479	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:a.as_f32x8()))
4480	}
4481
4482	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4483	///
4484	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4485	#[inline]
4486	#[target_feature(enable = "avx512f,avx512vl")]
4487	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4488	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4489	pub unsafe fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4490	let fnmsub: f32x8 = _mm256_fnmsub_ps(a, b, c).as_f32x8();
4491	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
4492	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:zero))
4493	}
4494
4495	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4496	///
4497	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
4498	#[inline]
4499	#[target_feature(enable = "avx512f,avx512vl")]
4500	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4501	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4502	pub unsafe fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4503	let fnmsub: f32x8 = _mm256_fnmsub_ps(a, b, c).as_f32x8();
4504	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:c.as_f32x8()))
4505	}
4506
4507	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4508	///
4509	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
4510	#[inline]
4511	#[target_feature(enable = "avx512f,avx512vl")]
4512	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4513	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4514	pub unsafe fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4515	let fnmsub: f32x4 = _mm_fnmsub_ps(a, b, c).as_f32x4();
4516	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:a.as_f32x4()))
4517	}
4518
4519	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4520	///
4521	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
4522	#[inline]
4523	#[target_feature(enable = "avx512f,avx512vl")]
4524	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4525	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4526	pub unsafe fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4527	let fnmsub: f32x4 = _mm_fnmsub_ps(a, b, c).as_f32x4();
4528	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
4529	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:zero))
4530	}
4531
4532	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4533	///
4534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
4535	#[inline]
4536	#[target_feature(enable = "avx512f,avx512vl")]
4537	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4538	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4539	pub unsafe fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4540	let fnmsub: f32x4 = _mm_fnmsub_ps(a, b, c).as_f32x4();
4541	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:c.as_f32x4()))
4542	}
4543
4544	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4545	///
4546	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
4547	#[inline]
4548	#[target_feature(enable = "avx512f")]
4549	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4550	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4551	pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4552	let zero: f64x8 = mem::zeroed();
4553	let suba: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
4554	let subc: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
4555	transmute(src:vfmadd132pd(a:suba, b:b.as_f64x8(), c:subc))
4556	}
4557
4558	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4559	///
4560	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
4561	#[inline]
4562	#[target_feature(enable = "avx512f")]
4563	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4564	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4565	pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4566	let fnmsub: f64x8 = _mm512_fnmsub_pd(a, b, c).as_f64x8();
4567	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:a.as_f64x8()))
4568	}
4569
4570	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4571	///
4572	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
4573	#[inline]
4574	#[target_feature(enable = "avx512f")]
4575	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4576	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4577	pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4578	let fnmsub: f64x8 = _mm512_fnmsub_pd(a, b, c).as_f64x8();
4579	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
4580	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:zero))
4581	}
4582
4583	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4584	///
4585	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
4586	#[inline]
4587	#[target_feature(enable = "avx512f")]
4588	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4589	#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4590	pub unsafe fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4591	let fnmsub: f64x8 = _mm512_fnmsub_pd(a, b, c).as_f64x8();
4592	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:c.as_f64x8()))
4593	}
4594
4595	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4596	///
4597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
4598	#[inline]
4599	#[target_feature(enable = "avx512f,avx512vl")]
4600	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4601	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4602	pub unsafe fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4603	let fnmsub: f64x4 = _mm256_fnmsub_pd(a, b, c).as_f64x4();
4604	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:a.as_f64x4()))
4605	}
4606
4607	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4608	///
4609	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
4610	#[inline]
4611	#[target_feature(enable = "avx512f,avx512vl")]
4612	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4613	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4614	pub unsafe fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4615	let fnmsub: f64x4 = _mm256_fnmsub_pd(a, b, c).as_f64x4();
4616	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
4617	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:zero))
4618	}
4619
4620	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4621	///
4622	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
4623	#[inline]
4624	#[target_feature(enable = "avx512f,avx512vl")]
4625	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4626	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4627	pub unsafe fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4628	let fnmsub: f64x4 = _mm256_fnmsub_pd(a, b, c).as_f64x4();
4629	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:c.as_f64x4()))
4630	}
4631
4632	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4633	///
4634	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
4635	#[inline]
4636	#[target_feature(enable = "avx512f,avx512vl")]
4637	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4638	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4639	pub unsafe fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4640	let fnmsub: f64x2 = _mm_fnmsub_pd(a, b, c).as_f64x2();
4641	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:a.as_f64x2()))
4642	}
4643
4644	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4645	///
4646	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
4647	#[inline]
4648	#[target_feature(enable = "avx512f,avx512vl")]
4649	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4650	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4651	pub unsafe fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4652	let fnmsub: f64x2 = _mm_fnmsub_pd(a, b, c).as_f64x2();
4653	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
4654	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:zero))
4655	}
4656
4657	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4658	///
4659	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
4660	#[inline]
4661	#[target_feature(enable = "avx512f,avx512vl")]
4662	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4663	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4664	pub unsafe fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4665	let fnmsub: f64x2 = _mm_fnmsub_pd(a, b, c).as_f64x2();
4666	transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:c.as_f64x2()))
4667	}
4668
4669	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4670	///
4671	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
4672	#[inline]
4673	#[target_feature(enable = "avx512f")]
4674	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4675	#[cfg_attr(test, assert_instr(vrcp14ps))]
4676	pub unsafe fn _mm512_rcp14_ps(a: __m512) -> __m512 {
4677	transmute(src:vrcp14ps(
4678	a:a.as_f32x16(),
4679	src:_mm512_setzero_ps().as_f32x16(),
4680	m:`0b11111111_11111111`,
4681	))
4682	}
4683
4684	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4685	///
4686	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
4687	#[inline]
4688	#[target_feature(enable = "avx512f")]
4689	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4690	#[cfg_attr(test, assert_instr(vrcp14ps))]
4691	pub unsafe fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4692	transmute(src:vrcp14ps(a:a.as_f32x16(), src:src.as_f32x16(), m:k))
4693	}
4694
4695	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4696	///
4697	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
4698	#[inline]
4699	#[target_feature(enable = "avx512f")]
4700	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4701	#[cfg_attr(test, assert_instr(vrcp14ps))]
4702	pub unsafe fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
4703	transmute(src:vrcp14ps(a:a.as_f32x16(), src:_mm512_setzero_ps().as_f32x16(), m:k))
4704	}
4705
4706	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4707	///
4708	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
4709	#[inline]
4710	#[target_feature(enable = "avx512f,avx512vl")]
4711	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4712	#[cfg_attr(test, assert_instr(vrcp14ps))]
4713	pub unsafe fn _mm256_rcp14_ps(a: __m256) -> __m256 {
4714	transmute(src:vrcp14ps256(
4715	a:a.as_f32x8(),
4716	src:_mm256_setzero_ps().as_f32x8(),
4717	m:`0b11111111`,
4718	))
4719	}
4720
4721	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4722	///
4723	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
4724	#[inline]
4725	#[target_feature(enable = "avx512f,avx512vl")]
4726	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4727	#[cfg_attr(test, assert_instr(vrcp14ps))]
4728	pub unsafe fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4729	transmute(src:vrcp14ps256(a:a.as_f32x8(), src:src.as_f32x8(), m:k))
4730	}
4731
4732	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4733	///
4734	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
4735	#[inline]
4736	#[target_feature(enable = "avx512f,avx512vl")]
4737	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4738	#[cfg_attr(test, assert_instr(vrcp14ps))]
4739	pub unsafe fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
4740	transmute(src:vrcp14ps256(a:a.as_f32x8(), src:_mm256_setzero_ps().as_f32x8(), m:k))
4741	}
4742
4743	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4744	///
4745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
4746	#[inline]
4747	#[target_feature(enable = "avx512f,avx512vl")]
4748	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4749	#[cfg_attr(test, assert_instr(vrcp14ps))]
4750	pub unsafe fn _mm_rcp14_ps(a: __m128) -> __m128 {
4751	transmute(src:vrcp14ps128(
4752	a:a.as_f32x4(),
4753	src:_mm_setzero_ps().as_f32x4(),
4754	m:`0b00001111`,
4755	))
4756	}
4757
4758	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4759	///
4760	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
4761	#[inline]
4762	#[target_feature(enable = "avx512f,avx512vl")]
4763	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4764	#[cfg_attr(test, assert_instr(vrcp14ps))]
4765	pub unsafe fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4766	transmute(src:vrcp14ps128(a:a.as_f32x4(), src:src.as_f32x4(), m:k))
4767	}
4768
4769	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4770	///
4771	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
4772	#[inline]
4773	#[target_feature(enable = "avx512f,avx512vl")]
4774	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4775	#[cfg_attr(test, assert_instr(vrcp14ps))]
4776	pub unsafe fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
4777	transmute(src:vrcp14ps128(a:a.as_f32x4(), src:_mm_setzero_ps().as_f32x4(), m:k))
4778	}
4779
4780	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4781	///
4782	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
4783	#[inline]
4784	#[target_feature(enable = "avx512f")]
4785	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4786	#[cfg_attr(test, assert_instr(vrcp14pd))]
4787	pub unsafe fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
4788	transmute(src:vrcp14pd(
4789	a:a.as_f64x8(),
4790	src:_mm512_setzero_pd().as_f64x8(),
4791	m:`0b11111111`,
4792	))
4793	}
4794
4795	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4796	///
4797	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
4798	#[inline]
4799	#[target_feature(enable = "avx512f")]
4800	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4801	#[cfg_attr(test, assert_instr(vrcp14pd))]
4802	pub unsafe fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
4803	transmute(src:vrcp14pd(a:a.as_f64x8(), src:src.as_f64x8(), m:k))
4804	}
4805
4806	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4807	///
4808	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
4809	#[inline]
4810	#[target_feature(enable = "avx512f")]
4811	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4812	#[cfg_attr(test, assert_instr(vrcp14pd))]
4813	pub unsafe fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
4814	transmute(src:vrcp14pd(a:a.as_f64x8(), src:_mm512_setzero_pd().as_f64x8(), m:k))
4815	}
4816
4817	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4818	///
4819	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
4820	#[inline]
4821	#[target_feature(enable = "avx512f,avx512vl")]
4822	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4823	#[cfg_attr(test, assert_instr(vrcp14pd))]
4824	pub unsafe fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
4825	transmute(src:vrcp14pd256(
4826	a:a.as_f64x4(),
4827	src:_mm256_setzero_pd().as_f64x4(),
4828	m:`0b00001111`,
4829	))
4830	}
4831
4832	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4833	///
4834	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
4835	#[inline]
4836	#[target_feature(enable = "avx512f,avx512vl")]
4837	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4838	#[cfg_attr(test, assert_instr(vrcp14pd))]
4839	pub unsafe fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
4840	transmute(src:vrcp14pd256(a:a.as_f64x4(), src:src.as_f64x4(), m:k))
4841	}
4842
4843	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4844	///
4845	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
4846	#[inline]
4847	#[target_feature(enable = "avx512f,avx512vl")]
4848	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4849	#[cfg_attr(test, assert_instr(vrcp14pd))]
4850	pub unsafe fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
4851	transmute(src:vrcp14pd256(a:a.as_f64x4(), src:_mm256_setzero_pd().as_f64x4(), m:k))
4852	}
4853
4854	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4855	///
4856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
4857	#[inline]
4858	#[target_feature(enable = "avx512f,avx512vl")]
4859	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4860	#[cfg_attr(test, assert_instr(vrcp14pd))]
4861	pub unsafe fn _mm_rcp14_pd(a: __m128d) -> __m128d {
4862	transmute(src:vrcp14pd128(
4863	a:a.as_f64x2(),
4864	src:_mm_setzero_pd().as_f64x2(),
4865	m:`0b00000011`,
4866	))
4867	}
4868
4869	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4870	///
4871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
4872	#[inline]
4873	#[target_feature(enable = "avx512f,avx512vl")]
4874	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4875	#[cfg_attr(test, assert_instr(vrcp14pd))]
4876	pub unsafe fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
4877	transmute(src:vrcp14pd128(a:a.as_f64x2(), src:src.as_f64x2(), m:k))
4878	}
4879
4880	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4881	///
4882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
4883	#[inline]
4884	#[target_feature(enable = "avx512f,avx512vl")]
4885	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4886	#[cfg_attr(test, assert_instr(vrcp14pd))]
4887	pub unsafe fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
4888	transmute(src:vrcp14pd128(a:a.as_f64x2(), src:_mm_setzero_pd().as_f64x2(), m:k))
4889	}
4890
4891	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4892	///
4893	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
4894	#[inline]
4895	#[target_feature(enable = "avx512f")]
4896	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4897	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4898	pub unsafe fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
4899	transmute(src:vrsqrt14ps(
4900	a:a.as_f32x16(),
4901	src:_mm512_setzero_ps().as_f32x16(),
4902	m:`0b11111111_11111111`,
4903	))
4904	}
4905
4906	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4907	///
4908	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
4909	#[inline]
4910	#[target_feature(enable = "avx512f")]
4911	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4912	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4913	pub unsafe fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4914	transmute(src:vrsqrt14ps(a:a.as_f32x16(), src:src.as_f32x16(), m:k))
4915	}
4916
4917	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4918	///
4919	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
4920	#[inline]
4921	#[target_feature(enable = "avx512f")]
4922	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4923	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4924	pub unsafe fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
4925	transmute(src:vrsqrt14ps(
4926	a:a.as_f32x16(),
4927	src:_mm512_setzero_ps().as_f32x16(),
4928	m:k,
4929	))
4930	}
4931
4932	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4933	///
4934	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
4935	#[inline]
4936	#[target_feature(enable = "avx512f,avx512vl")]
4937	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4938	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4939	pub unsafe fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4940	transmute(src:vrsqrt14ps256(a:a.as_f32x8(), src:src.as_f32x8(), m:k))
4941	}
4942
4943	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4944	///
4945	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
4946	#[inline]
4947	#[target_feature(enable = "avx512f,avx512vl")]
4948	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4949	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4950	pub unsafe fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
4951	transmute(src:vrsqrt14ps256(
4952	a:a.as_f32x8(),
4953	src:_mm256_setzero_ps().as_f32x8(),
4954	m:k,
4955	))
4956	}
4957
4958	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4959	///
4960	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
4961	#[inline]
4962	#[target_feature(enable = "avx512f,avx512vl")]
4963	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4964	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4965	pub unsafe fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4966	transmute(src:vrsqrt14ps128(a:a.as_f32x4(), src:src.as_f32x4(), m:k))
4967	}
4968
4969	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4970	///
4971	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
4972	#[inline]
4973	#[target_feature(enable = "avx512f,avx512vl")]
4974	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4975	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4976	pub unsafe fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
4977	transmute(src:vrsqrt14ps128(a:a.as_f32x4(), src:_mm_setzero_ps().as_f32x4(), m:k))
4978	}
4979
4980	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4981	///
4982	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
4983	#[inline]
4984	#[target_feature(enable = "avx512f")]
4985	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4986	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
4987	pub unsafe fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
4988	transmute(src:vrsqrt14pd(
4989	a:a.as_f64x8(),
4990	src:_mm512_setzero_pd().as_f64x8(),
4991	m:`0b11111111`,
4992	))
4993	}
4994
4995	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4996	///
4997	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
4998	#[inline]
4999	#[target_feature(enable = "avx512f")]
5000	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5001	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5002	pub unsafe fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5003	transmute(src:vrsqrt14pd(a:a.as_f64x8(), src:src.as_f64x8(), m:k))
5004	}
5005
5006	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5007	///
5008	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5009	#[inline]
5010	#[target_feature(enable = "avx512f")]
5011	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5012	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5013	pub unsafe fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5014	transmute(src:vrsqrt14pd(a:a.as_f64x8(), src:_mm512_setzero_pd().as_f64x8(), m:k))
5015	}
5016
5017	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5018	///
5019	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5020	#[inline]
5021	#[target_feature(enable = "avx512f,avx512vl")]
5022	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5023	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5024	pub unsafe fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5025	transmute(src:vrsqrt14pd256(a:a.as_f64x4(), src:src.as_f64x4(), m:k))
5026	}
5027
5028	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5029	///
5030	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5031	#[inline]
5032	#[target_feature(enable = "avx512f,avx512vl")]
5033	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5034	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5035	pub unsafe fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5036	transmute(src:vrsqrt14pd256(
5037	a:a.as_f64x4(),
5038	src:_mm256_setzero_pd().as_f64x4(),
5039	m:k,
5040	))
5041	}
5042
5043	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5044	///
5045	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5046	#[inline]
5047	#[target_feature(enable = "avx512f,avx512vl")]
5048	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5049	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5050	pub unsafe fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5051	transmute(src:vrsqrt14pd128(a:a.as_f64x2(), src:src.as_f64x2(), m:k))
5052	}
5053
5054	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5055	///
5056	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5057	#[inline]
5058	#[target_feature(enable = "avx512f,avx512vl")]
5059	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5060	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5061	pub unsafe fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5062	transmute(src:vrsqrt14pd128(a:a.as_f64x2(), src:_mm_setzero_pd().as_f64x2(), m:k))
5063	}
5064
5065	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5066	///
5067	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5068	#[inline]
5069	#[target_feature(enable = "avx512f")]
5070	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5071	#[cfg_attr(test, assert_instr(vgetexpps))]
5072	pub unsafe fn _mm512_getexp_ps(a: __m512) -> __m512 {
5073	transmute(src:vgetexpps(
5074	a:a.as_f32x16(),
5075	src:_mm512_setzero_ps().as_f32x16(),
5076	m:`0b11111111_11111111`,
5077	_MM_FROUND_CUR_DIRECTION,
5078	))
5079	}
5080
5081	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5082	///
5083	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5084	#[inline]
5085	#[target_feature(enable = "avx512f")]
5086	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5087	#[cfg_attr(test, assert_instr(vgetexpps))]
5088	pub unsafe fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5089	transmute(src:vgetexpps(
5090	a:a.as_f32x16(),
5091	src:src.as_f32x16(),
5092	m:k,
5093	_MM_FROUND_CUR_DIRECTION,
5094	))
5095	}
5096
5097	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5098	///
5099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5100	#[inline]
5101	#[target_feature(enable = "avx512f")]
5102	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5103	#[cfg_attr(test, assert_instr(vgetexpps))]
5104	pub unsafe fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5105	transmute(src:vgetexpps(
5106	a:a.as_f32x16(),
5107	src:_mm512_setzero_ps().as_f32x16(),
5108	m:k,
5109	_MM_FROUND_CUR_DIRECTION,
5110	))
5111	}
5112
5113	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5114	///
5115	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5116	#[inline]
5117	#[target_feature(enable = "avx512f,avx512vl")]
5118	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5119	#[cfg_attr(test, assert_instr(vgetexpps))]
5120	pub unsafe fn _mm256_getexp_ps(a: __m256) -> __m256 {
5121	transmute(src:vgetexpps256(
5122	a:a.as_f32x8(),
5123	src:_mm256_setzero_ps().as_f32x8(),
5124	m:`0b11111111`,
5125	))
5126	}
5127
5128	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5129	///
5130	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5131	#[inline]
5132	#[target_feature(enable = "avx512f,avx512vl")]
5133	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5134	#[cfg_attr(test, assert_instr(vgetexpps))]
5135	pub unsafe fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5136	transmute(src:vgetexpps256(a:a.as_f32x8(), src:src.as_f32x8(), m:k))
5137	}
5138
5139	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5140	///
5141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5142	#[inline]
5143	#[target_feature(enable = "avx512f,avx512vl")]
5144	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5145	#[cfg_attr(test, assert_instr(vgetexpps))]
5146	pub unsafe fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5147	transmute(src:vgetexpps256(
5148	a:a.as_f32x8(),
5149	src:_mm256_setzero_ps().as_f32x8(),
5150	m:k,
5151	))
5152	}
5153
5154	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5155	///
5156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5157	#[inline]
5158	#[target_feature(enable = "avx512f,avx512vl")]
5159	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5160	#[cfg_attr(test, assert_instr(vgetexpps))]
5161	pub unsafe fn _mm_getexp_ps(a: __m128) -> __m128 {
5162	transmute(src:vgetexpps128(
5163	a:a.as_f32x4(),
5164	src:_mm_setzero_ps().as_f32x4(),
5165	m:`0b00001111`,
5166	))
5167	}
5168
5169	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5170	///
5171	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5172	#[inline]
5173	#[target_feature(enable = "avx512f,avx512vl")]
5174	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5175	#[cfg_attr(test, assert_instr(vgetexpps))]
5176	pub unsafe fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5177	transmute(src:vgetexpps128(a:a.as_f32x4(), src:src.as_f32x4(), m:k))
5178	}
5179
5180	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5181	///
5182	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5183	#[inline]
5184	#[target_feature(enable = "avx512f,avx512vl")]
5185	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5186	#[cfg_attr(test, assert_instr(vgetexpps))]
5187	pub unsafe fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5188	transmute(src:vgetexpps128(a:a.as_f32x4(), src:_mm_setzero_ps().as_f32x4(), m:k))
5189	}
5190
5191	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5192	///
5193	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5194	#[inline]
5195	#[target_feature(enable = "avx512f")]
5196	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5197	#[cfg_attr(test, assert_instr(vgetexppd))]
5198	pub unsafe fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5199	transmute(src:vgetexppd(
5200	a:a.as_f64x8(),
5201	src:_mm512_setzero_pd().as_f64x8(),
5202	m:`0b11111111`,
5203	_MM_FROUND_CUR_DIRECTION,
5204	))
5205	}
5206
5207	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5208	///
5209	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5210	#[inline]
5211	#[target_feature(enable = "avx512f")]
5212	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5213	#[cfg_attr(test, assert_instr(vgetexppd))]
5214	pub unsafe fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5215	transmute(src:vgetexppd(
5216	a:a.as_f64x8(),
5217	src:src.as_f64x8(),
5218	m:k,
5219	_MM_FROUND_CUR_DIRECTION,
5220	))
5221	}
5222
5223	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5224	///
5225	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5226	#[inline]
5227	#[target_feature(enable = "avx512f")]
5228	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5229	#[cfg_attr(test, assert_instr(vgetexppd))]
5230	pub unsafe fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5231	transmute(src:vgetexppd(
5232	a:a.as_f64x8(),
5233	src:_mm512_setzero_pd().as_f64x8(),
5234	m:k,
5235	_MM_FROUND_CUR_DIRECTION,
5236	))
5237	}
5238
5239	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5240	///
5241	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5242	#[inline]
5243	#[target_feature(enable = "avx512f,avx512vl")]
5244	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5245	#[cfg_attr(test, assert_instr(vgetexppd))]
5246	pub unsafe fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5247	transmute(src:vgetexppd256(
5248	a:a.as_f64x4(),
5249	src:_mm256_setzero_pd().as_f64x4(),
5250	m:`0b00001111`,
5251	))
5252	}
5253
5254	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5255	///
5256	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5257	#[inline]
5258	#[target_feature(enable = "avx512f,avx512vl")]
5259	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5260	#[cfg_attr(test, assert_instr(vgetexppd))]
5261	pub unsafe fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5262	transmute(src:vgetexppd256(a:a.as_f64x4(), src:src.as_f64x4(), m:k))
5263	}
5264
5265	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5266	///
5267	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5268	#[inline]
5269	#[target_feature(enable = "avx512f,avx512vl")]
5270	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5271	#[cfg_attr(test, assert_instr(vgetexppd))]
5272	pub unsafe fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5273	transmute(src:vgetexppd256(
5274	a:a.as_f64x4(),
5275	src:_mm256_setzero_pd().as_f64x4(),
5276	m:k,
5277	))
5278	}
5279
5280	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5281	///
5282	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5283	#[inline]
5284	#[target_feature(enable = "avx512f,avx512vl")]
5285	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5286	#[cfg_attr(test, assert_instr(vgetexppd))]
5287	pub unsafe fn _mm_getexp_pd(a: __m128d) -> __m128d {
5288	transmute(src:vgetexppd128(
5289	a:a.as_f64x2(),
5290	src:_mm_setzero_pd().as_f64x2(),
5291	m:`0b00000011`,
5292	))
5293	}
5294
5295	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5296	///
5297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5298	#[inline]
5299	#[target_feature(enable = "avx512f,avx512vl")]
5300	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5301	#[cfg_attr(test, assert_instr(vgetexppd))]
5302	pub unsafe fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5303	transmute(src:vgetexppd128(a:a.as_f64x2(), src:src.as_f64x2(), m:k))
5304	}
5305
5306	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5307	///
5308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5309	#[inline]
5310	#[target_feature(enable = "avx512f,avx512vl")]
5311	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5312	#[cfg_attr(test, assert_instr(vgetexppd))]
5313	pub unsafe fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5314	transmute(src:vgetexppd128(a:a.as_f64x2(), src:_mm_setzero_pd().as_f64x2(), m:k))
5315	}
5316
5317	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5318	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5319	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5320	/// _MM_FROUND_TO_NEG_INF // round down\
5321	/// _MM_FROUND_TO_POS_INF // round up\
5322	/// _MM_FROUND_TO_ZERO // truncate\
5323	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5324	///
5325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5326	#[inline]
5327	#[target_feature(enable = "avx512f")]
5328	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5329	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5330	#[rustc_legacy_const_generics(`1`)]
5331	pub unsafe fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5332	static_assert_uimm_bits!(IMM8, `8`);
5333	let a: f32x16 = a.as_f32x16();
5334	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
5335	let r: f32x16 = vrndscaleps(a, IMM8, src:zero, mask:`0b11111111_11111111`, _MM_FROUND_CUR_DIRECTION);
5336	transmute(src:r)
5337	}
5338
5339	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5340	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5341	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5342	/// _MM_FROUND_TO_NEG_INF // round down\
5343	/// _MM_FROUND_TO_POS_INF // round up\
5344	/// _MM_FROUND_TO_ZERO // truncate\
5345	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5346	///
5347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5348	#[inline]
5349	#[target_feature(enable = "avx512f")]
5350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5351	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5352	#[rustc_legacy_const_generics(`3`)]
5353	pub unsafe fn _mm512_mask_roundscale_ps<const IMM8: i32>(
5354	src: __m512,
5355	k: __mmask16,
5356	a: __m512,
5357	) -> __m512 {
5358	static_assert_uimm_bits!(IMM8, `8`);
5359	let a: f32x16 = a.as_f32x16();
5360	let src: f32x16 = src.as_f32x16();
5361	let r: f32x16 = vrndscaleps(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
5362	transmute(src:r)
5363	}
5364
5365	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5366	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5367	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5368	/// _MM_FROUND_TO_NEG_INF // round down\
5369	/// _MM_FROUND_TO_POS_INF // round up\
5370	/// _MM_FROUND_TO_ZERO // truncate\
5371	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5372	///
5373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5374	#[inline]
5375	#[target_feature(enable = "avx512f")]
5376	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5377	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5378	#[rustc_legacy_const_generics(`2`)]
5379	pub unsafe fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5380	static_assert_uimm_bits!(IMM8, `8`);
5381	let a: f32x16 = a.as_f32x16();
5382	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
5383	let r: f32x16 = vrndscaleps(a, IMM8, src:zero, mask:k, _MM_FROUND_CUR_DIRECTION);
5384	transmute(src:r)
5385	}
5386
5387	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5388	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5389	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5390	/// _MM_FROUND_TO_NEG_INF // round down\
5391	/// _MM_FROUND_TO_POS_INF // round up\
5392	/// _MM_FROUND_TO_ZERO // truncate\
5393	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5394	///
5395	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5396	#[inline]
5397	#[target_feature(enable = "avx512f,avx512vl")]
5398	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5399	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `250`))]
5400	#[rustc_legacy_const_generics(`1`)]
5401	pub unsafe fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5402	static_assert_uimm_bits!(IMM8, `8`);
5403	let a: f32x8 = a.as_f32x8();
5404	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
5405	let r: f32x8 = vrndscaleps256(a, IMM8, src:zero, mask:`0b11111111`);
5406	transmute(src:r)
5407	}
5408
5409	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5410	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5411	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5412	/// _MM_FROUND_TO_NEG_INF // round down\
5413	/// _MM_FROUND_TO_POS_INF // round up\
5414	/// _MM_FROUND_TO_ZERO // truncate\
5415	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5416	///
5417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5418	#[inline]
5419	#[target_feature(enable = "avx512f,avx512vl")]
5420	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5421	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5422	#[rustc_legacy_const_generics(`3`)]
5423	pub unsafe fn _mm256_mask_roundscale_ps<const IMM8: i32>(
5424	src: __m256,
5425	k: __mmask8,
5426	a: __m256,
5427	) -> __m256 {
5428	static_assert_uimm_bits!(IMM8, `8`);
5429	let a: f32x8 = a.as_f32x8();
5430	let src: f32x8 = src.as_f32x8();
5431	let r: f32x8 = vrndscaleps256(a, IMM8, src, mask:k);
5432	transmute(src:r)
5433	}
5434
5435	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5436	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5437	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5438	/// _MM_FROUND_TO_NEG_INF // round down\
5439	/// _MM_FROUND_TO_POS_INF // round up\
5440	/// _MM_FROUND_TO_ZERO // truncate\
5441	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5442	///
5443	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5444	#[inline]
5445	#[target_feature(enable = "avx512f,avx512vl")]
5446	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5447	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5448	#[rustc_legacy_const_generics(`2`)]
5449	pub unsafe fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5450	static_assert_uimm_bits!(IMM8, `8`);
5451	let a: f32x8 = a.as_f32x8();
5452	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
5453	let r: f32x8 = vrndscaleps256(a, IMM8, src:zero, mask:k);
5454	transmute(src:r)
5455	}
5456
5457	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5458	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5459	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5460	/// _MM_FROUND_TO_NEG_INF // round down\
5461	/// _MM_FROUND_TO_POS_INF // round up\
5462	/// _MM_FROUND_TO_ZERO // truncate\
5463	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5464	///
5465	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5466	#[inline]
5467	#[target_feature(enable = "avx512f,avx512vl")]
5468	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5469	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `250`))]
5470	#[rustc_legacy_const_generics(`1`)]
5471	pub unsafe fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5472	static_assert_uimm_bits!(IMM8, `8`);
5473	let a: f32x4 = a.as_f32x4();
5474	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
5475	let r: f32x4 = vrndscaleps128(a, IMM8, src:zero, mask:`0b00001111`);
5476	transmute(src:r)
5477	}
5478
5479	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5480	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5481	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5482	/// _MM_FROUND_TO_NEG_INF // round down\
5483	/// _MM_FROUND_TO_POS_INF // round up\
5484	/// _MM_FROUND_TO_ZERO // truncate\
5485	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5486	///
5487	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5488	#[inline]
5489	#[target_feature(enable = "avx512f,avx512vl")]
5490	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5491	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5492	#[rustc_legacy_const_generics(`3`)]
5493	pub unsafe fn _mm_mask_roundscale_ps<const IMM8: i32>(
5494	src: __m128,
5495	k: __mmask8,
5496	a: __m128,
5497	) -> __m128 {
5498	static_assert_uimm_bits!(IMM8, `8`);
5499	let a: f32x4 = a.as_f32x4();
5500	let src: f32x4 = src.as_f32x4();
5501	let r: f32x4 = vrndscaleps128(a, IMM8, src, mask:k);
5502	transmute(src:r)
5503	}
5504
5505	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5506	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5507	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5508	/// _MM_FROUND_TO_NEG_INF // round down\
5509	/// _MM_FROUND_TO_POS_INF // round up\
5510	/// _MM_FROUND_TO_ZERO // truncate\
5511	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5512	///
5513	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
5514	#[inline]
5515	#[target_feature(enable = "avx512f,avx512vl")]
5516	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5517	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5518	#[rustc_legacy_const_generics(`2`)]
5519	pub unsafe fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5520	static_assert_uimm_bits!(IMM8, `8`);
5521	let a: f32x4 = a.as_f32x4();
5522	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
5523	let r: f32x4 = vrndscaleps128(a, IMM8, src:zero, mask:k);
5524	transmute(src:r)
5525	}
5526
5527	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5528	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5529	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5530	/// _MM_FROUND_TO_NEG_INF // round down\
5531	/// _MM_FROUND_TO_POS_INF // round up\
5532	/// _MM_FROUND_TO_ZERO // truncate\
5533	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5534	///
5535	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
5536	#[inline]
5537	#[target_feature(enable = "avx512f")]
5538	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5539	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5540	#[rustc_legacy_const_generics(`1`)]
5541	pub unsafe fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5542	static_assert_uimm_bits!(IMM8, `8`);
5543	let a: f64x8 = a.as_f64x8();
5544	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
5545	let r: f64x8 = vrndscalepd(a, IMM8, src:zero, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION);
5546	transmute(src:r)
5547	}
5548
5549	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5550	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5551	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5552	/// _MM_FROUND_TO_NEG_INF // round down\
5553	/// _MM_FROUND_TO_POS_INF // round up\
5554	/// _MM_FROUND_TO_ZERO // truncate\
5555	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5556	///
5557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
5558	#[inline]
5559	#[target_feature(enable = "avx512f")]
5560	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5561	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5562	#[rustc_legacy_const_generics(`3`)]
5563	pub unsafe fn _mm512_mask_roundscale_pd<const IMM8: i32>(
5564	src: __m512d,
5565	k: __mmask8,
5566	a: __m512d,
5567	) -> __m512d {
5568	static_assert_uimm_bits!(IMM8, `8`);
5569	let a: f64x8 = a.as_f64x8();
5570	let src: f64x8 = src.as_f64x8();
5571	let r: f64x8 = vrndscalepd(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
5572	transmute(src:r)
5573	}
5574
5575	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5576	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5577	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5578	/// _MM_FROUND_TO_NEG_INF // round down\
5579	/// _MM_FROUND_TO_POS_INF // round up\
5580	/// _MM_FROUND_TO_ZERO // truncate\
5581	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5582	///
5583	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
5584	#[inline]
5585	#[target_feature(enable = "avx512f")]
5586	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5587	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5588	#[rustc_legacy_const_generics(`2`)]
5589	pub unsafe fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5590	static_assert_uimm_bits!(IMM8, `8`);
5591	let a: f64x8 = a.as_f64x8();
5592	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
5593	let r: f64x8 = vrndscalepd(a, IMM8, src:zero, mask:k, _MM_FROUND_CUR_DIRECTION);
5594	transmute(src:r)
5595	}
5596
5597	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5598	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5599	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5600	/// _MM_FROUND_TO_NEG_INF // round down\
5601	/// _MM_FROUND_TO_POS_INF // round up\
5602	/// _MM_FROUND_TO_ZERO // truncate\
5603	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5604	///
5605	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
5606	#[inline]
5607	#[target_feature(enable = "avx512f,avx512vl")]
5608	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5609	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5610	#[rustc_legacy_const_generics(`1`)]
5611	pub unsafe fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5612	static_assert_uimm_bits!(IMM8, `8`);
5613	let a: f64x4 = a.as_f64x4();
5614	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
5615	let r: f64x4 = vrndscalepd256(a, IMM8, src:zero, mask:`0b00001111`);
5616	transmute(src:r)
5617	}
5618
5619	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5620	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5621	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5622	/// _MM_FROUND_TO_NEG_INF // round down\
5623	/// _MM_FROUND_TO_POS_INF // round up\
5624	/// _MM_FROUND_TO_ZERO // truncate\
5625	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5626	///
5627	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
5628	#[inline]
5629	#[target_feature(enable = "avx512f,avx512vl")]
5630	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5631	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5632	#[rustc_legacy_const_generics(`3`)]
5633	pub unsafe fn _mm256_mask_roundscale_pd<const IMM8: i32>(
5634	src: __m256d,
5635	k: __mmask8,
5636	a: __m256d,
5637	) -> __m256d {
5638	static_assert_uimm_bits!(IMM8, `8`);
5639	let a: f64x4 = a.as_f64x4();
5640	let src: f64x4 = src.as_f64x4();
5641	let r: f64x4 = vrndscalepd256(a, IMM8, src, mask:k);
5642	transmute(src:r)
5643	}
5644
5645	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5646	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5647	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5648	/// _MM_FROUND_TO_NEG_INF // round down\
5649	/// _MM_FROUND_TO_POS_INF // round up\
5650	/// _MM_FROUND_TO_ZERO // truncate\
5651	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5652	///
5653	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
5654	#[inline]
5655	#[target_feature(enable = "avx512f,avx512vl")]
5656	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5657	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5658	#[rustc_legacy_const_generics(`2`)]
5659	pub unsafe fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5660	static_assert_uimm_bits!(IMM8, `8`);
5661	let a: f64x4 = a.as_f64x4();
5662	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
5663	let r: f64x4 = vrndscalepd256(a, IMM8, src:zero, mask:k);
5664	transmute(src:r)
5665	}
5666
5667	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5668	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5669	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5670	/// _MM_FROUND_TO_NEG_INF // round down\
5671	/// _MM_FROUND_TO_POS_INF // round up\
5672	/// _MM_FROUND_TO_ZERO // truncate\
5673	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5674	///
5675	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
5676	#[inline]
5677	#[target_feature(enable = "avx512f,avx512vl")]
5678	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5679	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5680	#[rustc_legacy_const_generics(`1`)]
5681	pub unsafe fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5682	static_assert_uimm_bits!(IMM8, `8`);
5683	let a: f64x2 = a.as_f64x2();
5684	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
5685	let r: f64x2 = vrndscalepd128(a, IMM8, src:zero, mask:`0b00000011`);
5686	transmute(src:r)
5687	}
5688
5689	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5690	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5691	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5692	/// _MM_FROUND_TO_NEG_INF // round down\
5693	/// _MM_FROUND_TO_POS_INF // round up\
5694	/// _MM_FROUND_TO_ZERO // truncate\
5695	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5696	///
5697	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
5698	#[inline]
5699	#[target_feature(enable = "avx512f,avx512vl")]
5700	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5701	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5702	#[rustc_legacy_const_generics(`3`)]
5703	pub unsafe fn _mm_mask_roundscale_pd<const IMM8: i32>(
5704	src: __m128d,
5705	k: __mmask8,
5706	a: __m128d,
5707	) -> __m128d {
5708	static_assert_uimm_bits!(IMM8, `8`);
5709	let a: f64x2 = a.as_f64x2();
5710	let src: f64x2 = src.as_f64x2();
5711	let r: f64x2 = vrndscalepd128(a, IMM8, src, mask:k);
5712	transmute(src:r)
5713	}
5714
5715	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5716	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5717	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5718	/// _MM_FROUND_TO_NEG_INF // round down\
5719	/// _MM_FROUND_TO_POS_INF // round up\
5720	/// _MM_FROUND_TO_ZERO // truncate\
5721	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5722	///
5723	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
5724	#[inline]
5725	#[target_feature(enable = "avx512f,avx512vl")]
5726	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5727	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5728	#[rustc_legacy_const_generics(`2`)]
5729	pub unsafe fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5730	static_assert_uimm_bits!(IMM8, `8`);
5731	let a: f64x2 = a.as_f64x2();
5732	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
5733	let r: f64x2 = vrndscalepd128(a, IMM8, src:zero, mask:k);
5734	transmute(src:r)
5735	}
5736
5737	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5738	///
5739	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
5740	#[inline]
5741	#[target_feature(enable = "avx512f")]
5742	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5743	#[cfg_attr(test, assert_instr(vscalefps))]
5744	pub unsafe fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
5745	transmute(src:vscalefps(
5746	a:a.as_f32x16(),
5747	b:b.as_f32x16(),
5748	src:_mm512_setzero_ps().as_f32x16(),
5749	mask:`0b11111111_11111111`,
5750	_MM_FROUND_CUR_DIRECTION,
5751	))
5752	}
5753
5754	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5755	///
5756	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
5757	#[inline]
5758	#[target_feature(enable = "avx512f")]
5759	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5760	#[cfg_attr(test, assert_instr(vscalefps))]
5761	pub unsafe fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
5762	transmute(src:vscalefps(
5763	a:a.as_f32x16(),
5764	b:b.as_f32x16(),
5765	src:src.as_f32x16(),
5766	mask:k,
5767	_MM_FROUND_CUR_DIRECTION,
5768	))
5769	}
5770
5771	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5772	///
5773	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
5774	#[inline]
5775	#[target_feature(enable = "avx512f")]
5776	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5777	#[cfg_attr(test, assert_instr(vscalefps))]
5778	pub unsafe fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
5779	transmute(src:vscalefps(
5780	a:a.as_f32x16(),
5781	b:b.as_f32x16(),
5782	src:_mm512_setzero_ps().as_f32x16(),
5783	mask:k,
5784	_MM_FROUND_CUR_DIRECTION,
5785	))
5786	}
5787
5788	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5789	///
5790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
5791	#[inline]
5792	#[target_feature(enable = "avx512f,avx512vl")]
5793	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5794	#[cfg_attr(test, assert_instr(vscalefps))]
5795	pub unsafe fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
5796	transmute(src:vscalefps256(
5797	a:a.as_f32x8(),
5798	b:b.as_f32x8(),
5799	src:_mm256_setzero_ps().as_f32x8(),
5800	mask:`0b11111111`,
5801	))
5802	}
5803
5804	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5805	///
5806	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
5807	#[inline]
5808	#[target_feature(enable = "avx512f,avx512vl")]
5809	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5810	#[cfg_attr(test, assert_instr(vscalefps))]
5811	pub unsafe fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
5812	transmute(src:vscalefps256(a:a.as_f32x8(), b:b.as_f32x8(), src:src.as_f32x8(), mask:k))
5813	}
5814
5815	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5816	///
5817	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
5818	#[inline]
5819	#[target_feature(enable = "avx512f,avx512vl")]
5820	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5821	#[cfg_attr(test, assert_instr(vscalefps))]
5822	pub unsafe fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
5823	transmute(src:vscalefps256(
5824	a:a.as_f32x8(),
5825	b:b.as_f32x8(),
5826	src:_mm256_setzero_ps().as_f32x8(),
5827	mask:k,
5828	))
5829	}
5830
5831	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5832	///
5833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
5834	#[inline]
5835	#[target_feature(enable = "avx512f,avx512vl")]
5836	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5837	#[cfg_attr(test, assert_instr(vscalefps))]
5838	pub unsafe fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
5839	transmute(src:vscalefps128(
5840	a:a.as_f32x4(),
5841	b:b.as_f32x4(),
5842	src:_mm_setzero_ps().as_f32x4(),
5843	mask:`0b00001111`,
5844	))
5845	}
5846
5847	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5848	///
5849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
5850	#[inline]
5851	#[target_feature(enable = "avx512f,avx512vl")]
5852	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5853	#[cfg_attr(test, assert_instr(vscalefps))]
5854	pub unsafe fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
5855	transmute(src:vscalefps128(a:a.as_f32x4(), b:b.as_f32x4(), src:src.as_f32x4(), mask:k))
5856	}
5857
5858	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5859	///
5860	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
5861	#[inline]
5862	#[target_feature(enable = "avx512f,avx512vl")]
5863	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5864	#[cfg_attr(test, assert_instr(vscalefps))]
5865	pub unsafe fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5866	transmute(src:vscalefps128(
5867	a:a.as_f32x4(),
5868	b:b.as_f32x4(),
5869	src:_mm_setzero_ps().as_f32x4(),
5870	mask:k,
5871	))
5872	}
5873
5874	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
5875	///
5876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
5877	#[inline]
5878	#[target_feature(enable = "avx512f")]
5879	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5880	#[cfg_attr(test, assert_instr(vscalefpd))]
5881	pub unsafe fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
5882	transmute(src:vscalefpd(
5883	a:a.as_f64x8(),
5884	b:b.as_f64x8(),
5885	src:_mm512_setzero_pd().as_f64x8(),
5886	mask:`0b11111111`,
5887	_MM_FROUND_CUR_DIRECTION,
5888	))
5889	}
5890
5891	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5892	///
5893	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
5894	#[inline]
5895	#[target_feature(enable = "avx512f")]
5896	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5897	#[cfg_attr(test, assert_instr(vscalefpd))]
5898	pub unsafe fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
5899	transmute(src:vscalefpd(
5900	a:a.as_f64x8(),
5901	b:b.as_f64x8(),
5902	src:src.as_f64x8(),
5903	mask:k,
5904	_MM_FROUND_CUR_DIRECTION,
5905	))
5906	}
5907
5908	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5909	///
5910	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
5911	#[inline]
5912	#[target_feature(enable = "avx512f")]
5913	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5914	#[cfg_attr(test, assert_instr(vscalefpd))]
5915	pub unsafe fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
5916	transmute(src:vscalefpd(
5917	a:a.as_f64x8(),
5918	b:b.as_f64x8(),
5919	src:_mm512_setzero_pd().as_f64x8(),
5920	mask:k,
5921	_MM_FROUND_CUR_DIRECTION,
5922	))
5923	}
5924
5925	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
5926	///
5927	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
5928	#[inline]
5929	#[target_feature(enable = "avx512f,avx512vl")]
5930	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5931	#[cfg_attr(test, assert_instr(vscalefpd))]
5932	pub unsafe fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
5933	transmute(src:vscalefpd256(
5934	a:a.as_f64x4(),
5935	b:b.as_f64x4(),
5936	src:_mm256_setzero_pd().as_f64x4(),
5937	mask:`0b00001111`,
5938	))
5939	}
5940
5941	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5942	///
5943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
5944	#[inline]
5945	#[target_feature(enable = "avx512f,avx512vl")]
5946	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5947	#[cfg_attr(test, assert_instr(vscalefpd))]
5948	pub unsafe fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
5949	transmute(src:vscalefpd256(a:a.as_f64x4(), b:b.as_f64x4(), src:src.as_f64x4(), mask:k))
5950	}
5951
5952	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5953	///
5954	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
5955	#[inline]
5956	#[target_feature(enable = "avx512f,avx512vl")]
5957	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5958	#[cfg_attr(test, assert_instr(vscalefpd))]
5959	pub unsafe fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
5960	transmute(src:vscalefpd256(
5961	a:a.as_f64x4(),
5962	b:b.as_f64x4(),
5963	src:_mm256_setzero_pd().as_f64x4(),
5964	mask:k,
5965	))
5966	}
5967
5968	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
5969	///
5970	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
5971	#[inline]
5972	#[target_feature(enable = "avx512f,avx512vl")]
5973	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5974	#[cfg_attr(test, assert_instr(vscalefpd))]
5975	pub unsafe fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
5976	transmute(src:vscalefpd128(
5977	a:a.as_f64x2(),
5978	b:b.as_f64x2(),
5979	src:_mm_setzero_pd().as_f64x2(),
5980	mask:`0b00000011`,
5981	))
5982	}
5983
5984	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5985	///
5986	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
5987	#[inline]
5988	#[target_feature(enable = "avx512f,avx512vl")]
5989	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5990	#[cfg_attr(test, assert_instr(vscalefpd))]
5991	pub unsafe fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5992	transmute(src:vscalefpd128(a:a.as_f64x2(), b:b.as_f64x2(), src:src.as_f64x2(), mask:k))
5993	}
5994
5995	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5996	///
5997	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
5998	#[inline]
5999	#[target_feature(enable = "avx512f,avx512vl")]
6000	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6001	#[cfg_attr(test, assert_instr(vscalefpd))]
6002	pub unsafe fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6003	transmute(src:vscalefpd128(
6004	a:a.as_f64x2(),
6005	b:b.as_f64x2(),
6006	src:_mm_setzero_pd().as_f64x2(),
6007	mask:k,
6008	))
6009	}
6010
6011	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6012	///
6013	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6014	#[inline]
6015	#[target_feature(enable = "avx512f")]
6016	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6017	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6018	#[rustc_legacy_const_generics(`3`)]
6019	pub unsafe fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6020	static_assert_uimm_bits!(IMM8, `8`);
6021	let a: f32x16 = a.as_f32x16();
6022	let b: f32x16 = b.as_f32x16();
6023	let c: i32x16 = c.as_i32x16();
6024	let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:`0b11111111_11111111`, _MM_FROUND_CUR_DIRECTION);
6025	transmute(src:r)
6026	}
6027
6028	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6029	///
6030	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6031	#[inline]
6032	#[target_feature(enable = "avx512f")]
6033	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6034	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6035	#[rustc_legacy_const_generics(`4`)]
6036	pub unsafe fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6037	a: __m512,
6038	k: __mmask16,
6039	b: __m512,
6040	c: __m512i,
6041	) -> __m512 {
6042	static_assert_uimm_bits!(IMM8, `8`);
6043	let a: f32x16 = a.as_f32x16();
6044	let b: f32x16 = b.as_f32x16();
6045	let c: i32x16 = c.as_i32x16();
6046	let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6047	transmute(src:r)
6048	}
6049
6050	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6051	///
6052	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6053	#[inline]
6054	#[target_feature(enable = "avx512f")]
6055	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6056	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6057	#[rustc_legacy_const_generics(`4`)]
6058	pub unsafe fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6059	k: __mmask16,
6060	a: __m512,
6061	b: __m512,
6062	c: __m512i,
6063	) -> __m512 {
6064	static_assert_uimm_bits!(IMM8, `8`);
6065	let a: f32x16 = a.as_f32x16();
6066	let b: f32x16 = b.as_f32x16();
6067	let c: i32x16 = c.as_i32x16();
6068	let r: f32x16 = vfixupimmpsz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6069	transmute(src:r)
6070	}
6071
6072	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6073	///
6074	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6075	#[inline]
6076	#[target_feature(enable = "avx512f,avx512vl")]
6077	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6078	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6079	#[rustc_legacy_const_generics(`3`)]
6080	pub unsafe fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6081	static_assert_uimm_bits!(IMM8, `8`);
6082	let a: f32x8 = a.as_f32x8();
6083	let b: f32x8 = b.as_f32x8();
6084	let c: i32x8 = c.as_i32x8();
6085	let r: f32x8 = vfixupimmps256(a, b, c, IMM8, mask:`0b11111111`);
6086	transmute(src:r)
6087	}
6088
6089	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6090	///
6091	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6092	#[inline]
6093	#[target_feature(enable = "avx512f,avx512vl")]
6094	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6095	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6096	#[rustc_legacy_const_generics(`4`)]
6097	pub unsafe fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6098	a: __m256,
6099	k: __mmask8,
6100	b: __m256,
6101	c: __m256i,
6102	) -> __m256 {
6103	static_assert_uimm_bits!(IMM8, `8`);
6104	let a: f32x8 = a.as_f32x8();
6105	let b: f32x8 = b.as_f32x8();
6106	let c: i32x8 = c.as_i32x8();
6107	let r: f32x8 = vfixupimmps256(a, b, c, IMM8, mask:k);
6108	transmute(src:r)
6109	}
6110
6111	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6112	///
6113	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6114	#[inline]
6115	#[target_feature(enable = "avx512f,avx512vl")]
6116	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6117	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6118	#[rustc_legacy_const_generics(`4`)]
6119	pub unsafe fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6120	k: __mmask8,
6121	a: __m256,
6122	b: __m256,
6123	c: __m256i,
6124	) -> __m256 {
6125	static_assert_uimm_bits!(IMM8, `8`);
6126	let a: f32x8 = a.as_f32x8();
6127	let b: f32x8 = b.as_f32x8();
6128	let c: i32x8 = c.as_i32x8();
6129	let r: f32x8 = vfixupimmpsz256(a, b, c, IMM8, mask:k);
6130	transmute(src:r)
6131	}
6132
6133	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6134	///
6135	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6136	#[inline]
6137	#[target_feature(enable = "avx512f,avx512vl")]
6138	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6139	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6140	#[rustc_legacy_const_generics(`3`)]
6141	pub unsafe fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6142	static_assert_uimm_bits!(IMM8, `8`);
6143	let a: f32x4 = a.as_f32x4();
6144	let b: f32x4 = b.as_f32x4();
6145	let c: i32x4 = c.as_i32x4();
6146	let r: f32x4 = vfixupimmps128(a, b, c, IMM8, mask:`0b00001111`);
6147	transmute(src:r)
6148	}
6149
6150	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6151	///
6152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6153	#[inline]
6154	#[target_feature(enable = "avx512f,avx512vl")]
6155	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6156	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6157	#[rustc_legacy_const_generics(`4`)]
6158	pub unsafe fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6159	a: __m128,
6160	k: __mmask8,
6161	b: __m128,
6162	c: __m128i,
6163	) -> __m128 {
6164	static_assert_uimm_bits!(IMM8, `8`);
6165	let a: f32x4 = a.as_f32x4();
6166	let b: f32x4 = b.as_f32x4();
6167	let c: i32x4 = c.as_i32x4();
6168	let r: f32x4 = vfixupimmps128(a, b, c, IMM8, mask:k);
6169	transmute(src:r)
6170	}
6171
6172	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6173	///
6174	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6175	#[inline]
6176	#[target_feature(enable = "avx512f,avx512vl")]
6177	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6178	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6179	#[rustc_legacy_const_generics(`4`)]
6180	pub unsafe fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6181	k: __mmask8,
6182	a: __m128,
6183	b: __m128,
6184	c: __m128i,
6185	) -> __m128 {
6186	static_assert_uimm_bits!(IMM8, `8`);
6187	let a: f32x4 = a.as_f32x4();
6188	let b: f32x4 = b.as_f32x4();
6189	let c: i32x4 = c.as_i32x4();
6190	let r: f32x4 = vfixupimmpsz128(a, b, c, IMM8, mask:k);
6191	transmute(src:r)
6192	}
6193
6194	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6195	///
6196	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6197	#[inline]
6198	#[target_feature(enable = "avx512f")]
6199	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6200	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6201	#[rustc_legacy_const_generics(`3`)]
6202	pub unsafe fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6203	static_assert_uimm_bits!(IMM8, `8`);
6204	let a: f64x8 = a.as_f64x8();
6205	let b: f64x8 = b.as_f64x8();
6206	let c: i64x8 = c.as_i64x8();
6207	let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION);
6208	transmute(src:r)
6209	}
6210
6211	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6212	///
6213	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6214	#[inline]
6215	#[target_feature(enable = "avx512f")]
6216	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6217	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6218	#[rustc_legacy_const_generics(`4`)]
6219	pub unsafe fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6220	a: __m512d,
6221	k: __mmask8,
6222	b: __m512d,
6223	c: __m512i,
6224	) -> __m512d {
6225	static_assert_uimm_bits!(IMM8, `8`);
6226	let a: f64x8 = a.as_f64x8();
6227	let b: f64x8 = b.as_f64x8();
6228	let c: i64x8 = c.as_i64x8();
6229	let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6230	transmute(src:r)
6231	}
6232
6233	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6234	///
6235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6236	#[inline]
6237	#[target_feature(enable = "avx512f")]
6238	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6239	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6240	#[rustc_legacy_const_generics(`4`)]
6241	pub unsafe fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6242	k: __mmask8,
6243	a: __m512d,
6244	b: __m512d,
6245	c: __m512i,
6246	) -> __m512d {
6247	static_assert_uimm_bits!(IMM8, `8`);
6248	let a: f64x8 = a.as_f64x8();
6249	let b: f64x8 = b.as_f64x8();
6250	let c: i64x8 = c.as_i64x8();
6251	let r: f64x8 = vfixupimmpdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6252	transmute(src:r)
6253	}
6254
6255	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6256	///
6257	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6258	#[inline]
6259	#[target_feature(enable = "avx512f,avx512vl")]
6260	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6261	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6262	#[rustc_legacy_const_generics(`3`)]
6263	pub unsafe fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6264	static_assert_uimm_bits!(IMM8, `8`);
6265	let a: f64x4 = a.as_f64x4();
6266	let b: f64x4 = b.as_f64x4();
6267	let c: i64x4 = c.as_i64x4();
6268	let r: f64x4 = vfixupimmpd256(a, b, c, IMM8, mask:`0b00001111`);
6269	transmute(src:r)
6270	}
6271
6272	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6273	///
6274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6275	#[inline]
6276	#[target_feature(enable = "avx512f,avx512vl")]
6277	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6278	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6279	#[rustc_legacy_const_generics(`4`)]
6280	pub unsafe fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6281	a: __m256d,
6282	k: __mmask8,
6283	b: __m256d,
6284	c: __m256i,
6285	) -> __m256d {
6286	static_assert_uimm_bits!(IMM8, `8`);
6287	let a: f64x4 = a.as_f64x4();
6288	let b: f64x4 = b.as_f64x4();
6289	let c: i64x4 = c.as_i64x4();
6290	let r: f64x4 = vfixupimmpd256(a, b, c, IMM8, mask:k);
6291	transmute(src:r)
6292	}
6293
6294	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6295	///
6296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6297	#[inline]
6298	#[target_feature(enable = "avx512f,avx512vl")]
6299	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6300	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6301	#[rustc_legacy_const_generics(`4`)]
6302	pub unsafe fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6303	k: __mmask8,
6304	a: __m256d,
6305	b: __m256d,
6306	c: __m256i,
6307	) -> __m256d {
6308	static_assert_uimm_bits!(IMM8, `8`);
6309	let a: f64x4 = a.as_f64x4();
6310	let b: f64x4 = b.as_f64x4();
6311	let c: i64x4 = c.as_i64x4();
6312	let r: f64x4 = vfixupimmpdz256(a, b, c, IMM8, mask:k);
6313	transmute(src:r)
6314	}
6315
6316	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6317	///
6318	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6319	#[inline]
6320	#[target_feature(enable = "avx512f,avx512vl")]
6321	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6322	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6323	#[rustc_legacy_const_generics(`3`)]
6324	pub unsafe fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6325	static_assert_uimm_bits!(IMM8, `8`);
6326	let a: f64x2 = a.as_f64x2();
6327	let b: f64x2 = b.as_f64x2();
6328	let c: i64x2 = c.as_i64x2();
6329	let r: f64x2 = vfixupimmpd128(a, b, c, IMM8, mask:`0b00000011`);
6330	transmute(src:r)
6331	}
6332
6333	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6334	///
6335	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6336	#[inline]
6337	#[target_feature(enable = "avx512f,avx512vl")]
6338	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6339	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6340	#[rustc_legacy_const_generics(`4`)]
6341	pub unsafe fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6342	a: __m128d,
6343	k: __mmask8,
6344	b: __m128d,
6345	c: __m128i,
6346	) -> __m128d {
6347	static_assert_uimm_bits!(IMM8, `8`);
6348	let a: f64x2 = a.as_f64x2();
6349	let b: f64x2 = b.as_f64x2();
6350	let c: i64x2 = c.as_i64x2();
6351	let r: f64x2 = vfixupimmpd128(a, b, c, IMM8, mask:k);
6352	transmute(src:r)
6353	}
6354
6355	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6356	///
6357	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6358	#[inline]
6359	#[target_feature(enable = "avx512f,avx512vl")]
6360	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6361	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6362	#[rustc_legacy_const_generics(`4`)]
6363	pub unsafe fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6364	k: __mmask8,
6365	a: __m128d,
6366	b: __m128d,
6367	c: __m128i,
6368	) -> __m128d {
6369	static_assert_uimm_bits!(IMM8, `8`);
6370	let a: f64x2 = a.as_f64x2();
6371	let b: f64x2 = b.as_f64x2();
6372	let c: i64x2 = c.as_i64x2();
6373	let r: f64x2 = vfixupimmpdz128(a, b, c, IMM8, mask:k);
6374	transmute(src:r)
6375	}
6376
6377	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6378	///
6379	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6380	#[inline]
6381	#[target_feature(enable = "avx512f")]
6382	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6383	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6384	#[rustc_legacy_const_generics(`3`)]
6385	pub unsafe fn _mm512_ternarylogic_epi32<const IMM8: i32>(
6386	a: __m512i,
6387	b: __m512i,
6388	c: __m512i,
6389	) -> __m512i {
6390	static_assert_uimm_bits!(IMM8, `8`);
6391	let a: i32x16 = a.as_i32x16();
6392	let b: i32x16 = b.as_i32x16();
6393	let c: i32x16 = c.as_i32x16();
6394	let r: i32x16 = vpternlogd(a, b, c, IMM8);
6395	transmute(src:r)
6396	}
6397
6398	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6399	///
6400	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6401	#[inline]
6402	#[target_feature(enable = "avx512f")]
6403	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6404	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6405	#[rustc_legacy_const_generics(`4`)]
6406	pub unsafe fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6407	src: __m512i,
6408	k: __mmask16,
6409	a: __m512i,
6410	b: __m512i,
6411	) -> __m512i {
6412	static_assert_uimm_bits!(IMM8, `8`);
6413	let src: i32x16 = src.as_i32x16();
6414	let a: i32x16 = a.as_i32x16();
6415	let b: i32x16 = b.as_i32x16();
6416	let r: i32x16 = vpternlogd(a:src, b:a, c:b, IMM8);
6417	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6418	}
6419
6420	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6421	///
6422	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6423	#[inline]
6424	#[target_feature(enable = "avx512f")]
6425	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6426	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6427	#[rustc_legacy_const_generics(`4`)]
6428	pub unsafe fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6429	k: __mmask16,
6430	a: __m512i,
6431	b: __m512i,
6432	c: __m512i,
6433	) -> __m512i {
6434	static_assert_uimm_bits!(IMM8, `8`);
6435	let a: i32x16 = a.as_i32x16();
6436	let b: i32x16 = b.as_i32x16();
6437	let c: i32x16 = c.as_i32x16();
6438	let r: i32x16 = vpternlogd(a, b, c, IMM8);
6439	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
6440	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
6441	}
6442
6443	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6444	///
6445	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6446	#[inline]
6447	#[target_feature(enable = "avx512f,avx512vl")]
6448	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6449	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6450	#[rustc_legacy_const_generics(`3`)]
6451	pub unsafe fn _mm256_ternarylogic_epi32<const IMM8: i32>(
6452	a: __m256i,
6453	b: __m256i,
6454	c: __m256i,
6455	) -> __m256i {
6456	static_assert_uimm_bits!(IMM8, `8`);
6457	let a: i32x8 = a.as_i32x8();
6458	let b: i32x8 = b.as_i32x8();
6459	let c: i32x8 = c.as_i32x8();
6460	let r: i32x8 = vpternlogd256(a, b, c, IMM8);
6461	transmute(src:r)
6462	}
6463
6464	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6465	///
6466	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
6467	#[inline]
6468	#[target_feature(enable = "avx512f,avx512vl")]
6469	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6470	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6471	#[rustc_legacy_const_generics(`4`)]
6472	pub unsafe fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
6473	src: __m256i,
6474	k: __mmask8,
6475	a: __m256i,
6476	b: __m256i,
6477	) -> __m256i {
6478	static_assert_uimm_bits!(IMM8, `8`);
6479	let src: i32x8 = src.as_i32x8();
6480	let a: i32x8 = a.as_i32x8();
6481	let b: i32x8 = b.as_i32x8();
6482	let r: i32x8 = vpternlogd256(a:src, b:a, c:b, IMM8);
6483	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6484	}
6485
6486	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6487	///
6488	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
6489	#[inline]
6490	#[target_feature(enable = "avx512f,avx512vl")]
6491	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6492	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6493	#[rustc_legacy_const_generics(`4`)]
6494	pub unsafe fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
6495	k: __mmask8,
6496	a: __m256i,
6497	b: __m256i,
6498	c: __m256i,
6499	) -> __m256i {
6500	static_assert_uimm_bits!(IMM8, `8`);
6501	let a: i32x8 = a.as_i32x8();
6502	let b: i32x8 = b.as_i32x8();
6503	let c: i32x8 = c.as_i32x8();
6504	let r: i32x8 = vpternlogd256(a, b, c, IMM8);
6505	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
6506	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
6507	}
6508
6509	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6510	///
6511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
6512	#[inline]
6513	#[target_feature(enable = "avx512f,avx512vl")]
6514	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6515	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6516	#[rustc_legacy_const_generics(`3`)]
6517	pub unsafe fn _mm_ternarylogic_epi32<const IMM8: i32>(
6518	a: __m128i,
6519	b: __m128i,
6520	c: __m128i,
6521	) -> __m128i {
6522	static_assert_uimm_bits!(IMM8, `8`);
6523	let a: i32x4 = a.as_i32x4();
6524	let b: i32x4 = b.as_i32x4();
6525	let c: i32x4 = c.as_i32x4();
6526	let r: i32x4 = vpternlogd128(a, b, c, IMM8);
6527	transmute(src:r)
6528	}
6529
6530	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6531	///
6532	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
6533	#[inline]
6534	#[target_feature(enable = "avx512f,avx512vl")]
6535	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6536	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6537	#[rustc_legacy_const_generics(`4`)]
6538	pub unsafe fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
6539	src: __m128i,
6540	k: __mmask8,
6541	a: __m128i,
6542	b: __m128i,
6543	) -> __m128i {
6544	static_assert_uimm_bits!(IMM8, `8`);
6545	let src: i32x4 = src.as_i32x4();
6546	let a: i32x4 = a.as_i32x4();
6547	let b: i32x4 = b.as_i32x4();
6548	let r: i32x4 = vpternlogd128(a:src, b:a, c:b, IMM8);
6549	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6550	}
6551
6552	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6553	///
6554	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
6555	#[inline]
6556	#[target_feature(enable = "avx512f,avx512vl")]
6557	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6558	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6559	#[rustc_legacy_const_generics(`4`)]
6560	pub unsafe fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
6561	k: __mmask8,
6562	a: __m128i,
6563	b: __m128i,
6564	c: __m128i,
6565	) -> __m128i {
6566	static_assert_uimm_bits!(IMM8, `8`);
6567	let a: i32x4 = a.as_i32x4();
6568	let b: i32x4 = b.as_i32x4();
6569	let c: i32x4 = c.as_i32x4();
6570	let r: i32x4 = vpternlogd128(a, b, c, IMM8);
6571	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
6572	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
6573	}
6574
6575	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6576	///
6577	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
6578	#[inline]
6579	#[target_feature(enable = "avx512f")]
6580	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6581	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6582	#[rustc_legacy_const_generics(`3`)]
6583	pub unsafe fn _mm512_ternarylogic_epi64<const IMM8: i32>(
6584	a: __m512i,
6585	b: __m512i,
6586	c: __m512i,
6587	) -> __m512i {
6588	static_assert_uimm_bits!(IMM8, `8`);
6589	let a: i64x8 = a.as_i64x8();
6590	let b: i64x8 = b.as_i64x8();
6591	let c: i64x8 = c.as_i64x8();
6592	let r: i64x8 = vpternlogq(a, b, c, IMM8);
6593	transmute(src:r)
6594	}
6595
6596	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6597	///
6598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
6599	#[inline]
6600	#[target_feature(enable = "avx512f")]
6601	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6602	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6603	#[rustc_legacy_const_generics(`4`)]
6604	pub unsafe fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
6605	src: __m512i,
6606	k: __mmask8,
6607	a: __m512i,
6608	b: __m512i,
6609	) -> __m512i {
6610	static_assert_uimm_bits!(IMM8, `8`);
6611	let src: i64x8 = src.as_i64x8();
6612	let a: i64x8 = a.as_i64x8();
6613	let b: i64x8 = b.as_i64x8();
6614	let r: i64x8 = vpternlogq(a:src, b:a, c:b, IMM8);
6615	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6616	}
6617
6618	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6619	///
6620	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
6621	#[inline]
6622	#[target_feature(enable = "avx512f")]
6623	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6624	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6625	#[rustc_legacy_const_generics(`4`)]
6626	pub unsafe fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
6627	k: __mmask8,
6628	a: __m512i,
6629	b: __m512i,
6630	c: __m512i,
6631	) -> __m512i {
6632	static_assert_uimm_bits!(IMM8, `8`);
6633	let a: i64x8 = a.as_i64x8();
6634	let b: i64x8 = b.as_i64x8();
6635	let c: i64x8 = c.as_i64x8();
6636	let r: i64x8 = vpternlogq(a, b, c, IMM8);
6637	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
6638	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
6639	}
6640
6641	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6642	///
6643	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
6644	#[inline]
6645	#[target_feature(enable = "avx512f,avx512vl")]
6646	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6647	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6648	#[rustc_legacy_const_generics(`3`)]
6649	pub unsafe fn _mm256_ternarylogic_epi64<const IMM8: i32>(
6650	a: __m256i,
6651	b: __m256i,
6652	c: __m256i,
6653	) -> __m256i {
6654	static_assert_uimm_bits!(IMM8, `8`);
6655	let a: i64x4 = a.as_i64x4();
6656	let b: i64x4 = b.as_i64x4();
6657	let c: i64x4 = c.as_i64x4();
6658	let r: i64x4 = vpternlogq256(a, b, c, IMM8);
6659	transmute(src:r)
6660	}
6661
6662	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6663	///
6664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
6665	#[inline]
6666	#[target_feature(enable = "avx512f,avx512vl")]
6667	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6668	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6669	#[rustc_legacy_const_generics(`4`)]
6670	pub unsafe fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
6671	src: __m256i,
6672	k: __mmask8,
6673	a: __m256i,
6674	b: __m256i,
6675	) -> __m256i {
6676	static_assert_uimm_bits!(IMM8, `8`);
6677	let src: i64x4 = src.as_i64x4();
6678	let a: i64x4 = a.as_i64x4();
6679	let b: i64x4 = b.as_i64x4();
6680	let r: i64x4 = vpternlogq256(a:src, b:a, c:b, IMM8);
6681	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6682	}
6683
6684	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6685	///
6686	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
6687	#[inline]
6688	#[target_feature(enable = "avx512f,avx512vl")]
6689	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6690	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6691	#[rustc_legacy_const_generics(`4`)]
6692	pub unsafe fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
6693	k: __mmask8,
6694	a: __m256i,
6695	b: __m256i,
6696	c: __m256i,
6697	) -> __m256i {
6698	static_assert_uimm_bits!(IMM8, `8`);
6699	let a: i64x4 = a.as_i64x4();
6700	let b: i64x4 = b.as_i64x4();
6701	let c: i64x4 = c.as_i64x4();
6702	let r: i64x4 = vpternlogq256(a, b, c, IMM8);
6703	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
6704	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
6705	}
6706
6707	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6708	///
6709	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
6710	#[inline]
6711	#[target_feature(enable = "avx512f,avx512vl")]
6712	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6713	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6714	#[rustc_legacy_const_generics(`3`)]
6715	pub unsafe fn _mm_ternarylogic_epi64<const IMM8: i32>(
6716	a: __m128i,
6717	b: __m128i,
6718	c: __m128i,
6719	) -> __m128i {
6720	static_assert_uimm_bits!(IMM8, `8`);
6721	let a: i64x2 = a.as_i64x2();
6722	let b: i64x2 = b.as_i64x2();
6723	let c: i64x2 = c.as_i64x2();
6724	let r: i64x2 = vpternlogq128(a, b, c, IMM8);
6725	transmute(src:r)
6726	}
6727
6728	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6729	///
6730	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
6731	#[inline]
6732	#[target_feature(enable = "avx512f,avx512vl")]
6733	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6734	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6735	#[rustc_legacy_const_generics(`4`)]
6736	pub unsafe fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
6737	src: __m128i,
6738	k: __mmask8,
6739	a: __m128i,
6740	b: __m128i,
6741	) -> __m128i {
6742	static_assert_uimm_bits!(IMM8, `8`);
6743	let src: i64x2 = src.as_i64x2();
6744	let a: i64x2 = a.as_i64x2();
6745	let b: i64x2 = b.as_i64x2();
6746	let r: i64x2 = vpternlogq128(a:src, b:a, c:b, IMM8);
6747	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6748	}
6749
6750	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6751	///
6752	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
6753	#[inline]
6754	#[target_feature(enable = "avx512f,avx512vl")]
6755	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6756	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6757	#[rustc_legacy_const_generics(`4`)]
6758	pub unsafe fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
6759	k: __mmask8,
6760	a: __m128i,
6761	b: __m128i,
6762	c: __m128i,
6763	) -> __m128i {
6764	static_assert_uimm_bits!(IMM8, `8`);
6765	let a: i64x2 = a.as_i64x2();
6766	let b: i64x2 = b.as_i64x2();
6767	let c: i64x2 = c.as_i64x2();
6768	let r: i64x2 = vpternlogq128(a, b, c, IMM8);
6769	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
6770	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
6771	}
6772
6773	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
6774	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
6775	/// _MM_MANT_NORM_1_2 // interval [1, 2)
6776	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
6777	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
6778	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
6779	/// The sign is determined by sc which can take the following values:
6780	/// _MM_MANT_SIGN_src // sign = sign(src)
6781	/// _MM_MANT_SIGN_zero // sign = 0
6782	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6783	///
6784	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
6785	#[inline]
6786	#[target_feature(enable = "avx512f")]
6787	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6788	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
6789	#[rustc_legacy_const_generics(`1`, `2`)]
6790	pub unsafe fn _mm512_getmant_ps<
6791	const NORM: _MM_MANTISSA_NORM_ENUM,
6792	const SIGN: _MM_MANTISSA_SIGN_ENUM,
6793	>(
6794	a: __m512,
6795	) -> __m512 {
6796	static_assert_uimm_bits!(NORM, `4`);
6797	static_assert_uimm_bits!(SIGN, `2`);
6798	let a: f32x16 = a.as_f32x16();
6799	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
6800	let r: f32x16 = vgetmantps(
6801	a,
6802	SIGN << `2` \| NORM,
6803	src:zero,
6804	m:`0b11111111_11111111`,
6805	_MM_FROUND_CUR_DIRECTION,
6806	);
6807	transmute(src:r)
6808	}
6809
6810	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6811	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6812	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
6813	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6814	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6815	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6816	/// The sign is determined by sc which can take the following values:\
6817	/// _MM_MANT_SIGN_src // sign = sign(src)\
6818	/// _MM_MANT_SIGN_zero // sign = 0\
6819	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6820	///
6821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
6822	#[inline]
6823	#[target_feature(enable = "avx512f")]
6824	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6825	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
6826	#[rustc_legacy_const_generics(`3`, `4`)]
6827	pub unsafe fn _mm512_mask_getmant_ps<
6828	const NORM: _MM_MANTISSA_NORM_ENUM,
6829	const SIGN: _MM_MANTISSA_SIGN_ENUM,
6830	>(
6831	src: __m512,
6832	k: __mmask16,
6833	a: __m512,
6834	) -> __m512 {
6835	static_assert_uimm_bits!(NORM, `4`);
6836	static_assert_uimm_bits!(SIGN, `2`);
6837	let a: f32x16 = a.as_f32x16();
6838	let src: f32x16 = src.as_f32x16();
6839	let r: f32x16 = vgetmantps(a, SIGN << `2` \| NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
6840	transmute(src:r)
6841	}
6842
6843	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6844	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6845	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
6846	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6847	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6848	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6849	/// The sign is determined by sc which can take the following values:\
6850	/// _MM_MANT_SIGN_src // sign = sign(src)\
6851	/// _MM_MANT_SIGN_zero // sign = 0\
6852	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6853	///
6854	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
6855	#[inline]
6856	#[target_feature(enable = "avx512f")]
6857	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6858	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
6859	#[rustc_legacy_const_generics(`2`, `3`)]
6860	pub unsafe fn _mm512_maskz_getmant_ps<
6861	const NORM: _MM_MANTISSA_NORM_ENUM,
6862	const SIGN: _MM_MANTISSA_SIGN_ENUM,
6863	>(
6864	k: __mmask16,
6865	a: __m512,
6866	) -> __m512 {
6867	static_assert_uimm_bits!(NORM, `4`);
6868	static_assert_uimm_bits!(SIGN, `2`);
6869	let a: f32x16 = a.as_f32x16();
6870	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
6871	let r: f32x16 = vgetmantps(a, SIGN << `2` \| NORM, src:zero, m:k, _MM_FROUND_CUR_DIRECTION);
6872	transmute(src:r)
6873	}
6874
6875	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
6876	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
6877	/// _MM_MANT_NORM_1_2 // interval [1, 2)
6878	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
6879	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
6880	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
6881	/// The sign is determined by sc which can take the following values:
6882	/// _MM_MANT_SIGN_src // sign = sign(src)
6883	/// _MM_MANT_SIGN_zero // sign = 0
6884	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6885	///
6886	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
6887	#[inline]
6888	#[target_feature(enable = "avx512f,avx512vl")]
6889	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6890	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
6891	#[rustc_legacy_const_generics(`1`, `2`)]
6892	pub unsafe fn _mm256_getmant_ps<
6893	const NORM: _MM_MANTISSA_NORM_ENUM,
6894	const SIGN: _MM_MANTISSA_SIGN_ENUM,
6895	>(
6896	a: __m256,
6897	) -> __m256 {
6898	static_assert_uimm_bits!(NORM, `4`);
6899	static_assert_uimm_bits!(SIGN, `2`);
6900	let a: f32x8 = a.as_f32x8();
6901	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
6902	let r: f32x8 = vgetmantps256(a, SIGN << `2` \| NORM, src:zero, m:`0b11111111`);
6903	transmute(src:r)
6904	}
6905
6906	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6907	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6908	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
6909	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6910	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6911	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6912	/// The sign is determined by sc which can take the following values:\
6913	/// _MM_MANT_SIGN_src // sign = sign(src)\
6914	/// _MM_MANT_SIGN_zero // sign = 0\
6915	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6916	///
6917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
6918	#[inline]
6919	#[target_feature(enable = "avx512f,avx512vl")]
6920	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6921	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
6922	#[rustc_legacy_const_generics(`3`, `4`)]
6923	pub unsafe fn _mm256_mask_getmant_ps<
6924	const NORM: _MM_MANTISSA_NORM_ENUM,
6925	const SIGN: _MM_MANTISSA_SIGN_ENUM,
6926	>(
6927	src: __m256,
6928	k: __mmask8,
6929	a: __m256,
6930	) -> __m256 {
6931	static_assert_uimm_bits!(NORM, `4`);
6932	static_assert_uimm_bits!(SIGN, `2`);
6933	let a: f32x8 = a.as_f32x8();
6934	let src: f32x8 = src.as_f32x8();
6935	let r: f32x8 = vgetmantps256(a, SIGN << `2` \| NORM, src, m:k);
6936	transmute(src:r)
6937	}
6938
6939	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6940	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6941	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
6942	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6943	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6944	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6945	/// The sign is determined by sc which can take the following values:\
6946	/// _MM_MANT_SIGN_src // sign = sign(src)\
6947	/// _MM_MANT_SIGN_zero // sign = 0\
6948	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6949	///
6950	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
6951	#[inline]
6952	#[target_feature(enable = "avx512f,avx512vl")]
6953	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6954	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
6955	#[rustc_legacy_const_generics(`2`, `3`)]
6956	pub unsafe fn _mm256_maskz_getmant_ps<
6957	const NORM: _MM_MANTISSA_NORM_ENUM,
6958	const SIGN: _MM_MANTISSA_SIGN_ENUM,
6959	>(
6960	k: __mmask8,
6961	a: __m256,
6962	) -> __m256 {
6963	static_assert_uimm_bits!(NORM, `4`);
6964	static_assert_uimm_bits!(SIGN, `2`);
6965	let a: f32x8 = a.as_f32x8();
6966	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
6967	let r: f32x8 = vgetmantps256(a, SIGN << `2` \| NORM, src:zero, m:k);
6968	transmute(src:r)
6969	}
6970
6971	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
6972	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
6973	/// _MM_MANT_NORM_1_2 // interval [1, 2)
6974	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
6975	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
6976	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
6977	/// The sign is determined by sc which can take the following values:
6978	/// _MM_MANT_SIGN_src // sign = sign(src)
6979	/// _MM_MANT_SIGN_zero // sign = 0
6980	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6981	///
6982	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
6983	#[inline]
6984	#[target_feature(enable = "avx512f,avx512vl")]
6985	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6986	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
6987	#[rustc_legacy_const_generics(`1`, `2`)]
6988	pub unsafe fn _mm_getmant_ps<
6989	const NORM: _MM_MANTISSA_NORM_ENUM,
6990	const SIGN: _MM_MANTISSA_SIGN_ENUM,
6991	>(
6992	a: __m128,
6993	) -> __m128 {
6994	static_assert_uimm_bits!(NORM, `4`);
6995	static_assert_uimm_bits!(SIGN, `2`);
6996	let a: f32x4 = a.as_f32x4();
6997	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
6998	let r: f32x4 = vgetmantps128(a, SIGN << `2` \| NORM, src:zero, m:`0b00001111`);
6999	transmute(src:r)
7000	}
7001
7002	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7003	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7004	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7005	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7006	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7007	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7008	/// The sign is determined by sc which can take the following values:\
7009	/// _MM_MANT_SIGN_src // sign = sign(src)\
7010	/// _MM_MANT_SIGN_zero // sign = 0\
7011	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7012	///
7013	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7014	#[inline]
7015	#[target_feature(enable = "avx512f,avx512vl")]
7016	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7017	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7018	#[rustc_legacy_const_generics(`3`, `4`)]
7019	pub unsafe fn _mm_mask_getmant_ps<
7020	const NORM: _MM_MANTISSA_NORM_ENUM,
7021	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7022	>(
7023	src: __m128,
7024	k: __mmask8,
7025	a: __m128,
7026	) -> __m128 {
7027	static_assert_uimm_bits!(NORM, `4`);
7028	static_assert_uimm_bits!(SIGN, `2`);
7029	let a: f32x4 = a.as_f32x4();
7030	let src: f32x4 = src.as_f32x4();
7031	let r: f32x4 = vgetmantps128(a, SIGN << `2` \| NORM, src, m:k);
7032	transmute(src:r)
7033	}
7034
7035	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7036	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7037	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7038	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7039	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7040	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7041	/// The sign is determined by sc which can take the following values:\
7042	/// _MM_MANT_SIGN_src // sign = sign(src)\
7043	/// _MM_MANT_SIGN_zero // sign = 0\
7044	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7045	///
7046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7047	#[inline]
7048	#[target_feature(enable = "avx512f,avx512vl")]
7049	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7050	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7051	#[rustc_legacy_const_generics(`2`, `3`)]
7052	pub unsafe fn _mm_maskz_getmant_ps<
7053	const NORM: _MM_MANTISSA_NORM_ENUM,
7054	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7055	>(
7056	k: __mmask8,
7057	a: __m128,
7058	) -> __m128 {
7059	static_assert_uimm_bits!(NORM, `4`);
7060	static_assert_uimm_bits!(SIGN, `2`);
7061	let a: f32x4 = a.as_f32x4();
7062	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
7063	let r: f32x4 = vgetmantps128(a, SIGN << `2` \| NORM, src:zero, m:k);
7064	transmute(src:r)
7065	}
7066
7067	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7068	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7069	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7070	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7071	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7072	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7073	/// The sign is determined by sc which can take the following values:\
7074	/// _MM_MANT_SIGN_src // sign = sign(src)\
7075	/// _MM_MANT_SIGN_zero // sign = 0\
7076	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7077	///
7078	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7079	#[inline]
7080	#[target_feature(enable = "avx512f")]
7081	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7082	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7083	#[rustc_legacy_const_generics(`1`, `2`)]
7084	pub unsafe fn _mm512_getmant_pd<
7085	const NORM: _MM_MANTISSA_NORM_ENUM,
7086	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7087	>(
7088	a: __m512d,
7089	) -> __m512d {
7090	static_assert_uimm_bits!(NORM, `4`);
7091	static_assert_uimm_bits!(SIGN, `2`);
7092	let a: f64x8 = a.as_f64x8();
7093	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
7094	let r: f64x8 = vgetmantpd(
7095	a,
7096	SIGN << `2` \| NORM,
7097	src:zero,
7098	m:`0b11111111`,
7099	_MM_FROUND_CUR_DIRECTION,
7100	);
7101	transmute(src:r)
7102	}
7103
7104	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7105	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7106	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7107	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7108	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7109	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7110	/// The sign is determined by sc which can take the following values:\
7111	/// _MM_MANT_SIGN_src // sign = sign(src)\
7112	/// _MM_MANT_SIGN_zero // sign = 0\
7113	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7114	///
7115	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7116	#[inline]
7117	#[target_feature(enable = "avx512f")]
7118	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7119	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7120	#[rustc_legacy_const_generics(`3`, `4`)]
7121	pub unsafe fn _mm512_mask_getmant_pd<
7122	const NORM: _MM_MANTISSA_NORM_ENUM,
7123	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7124	>(
7125	src: __m512d,
7126	k: __mmask8,
7127	a: __m512d,
7128	) -> __m512d {
7129	static_assert_uimm_bits!(NORM, `4`);
7130	static_assert_uimm_bits!(SIGN, `2`);
7131	let a: f64x8 = a.as_f64x8();
7132	let src: f64x8 = src.as_f64x8();
7133	let r: f64x8 = vgetmantpd(a, SIGN << `2` \| NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
7134	transmute(src:r)
7135	}
7136
7137	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7138	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7139	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7140	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7141	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7142	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7143	/// The sign is determined by sc which can take the following values:\
7144	/// _MM_MANT_SIGN_src // sign = sign(src)\
7145	/// _MM_MANT_SIGN_zero // sign = 0\
7146	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7147	///
7148	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7149	#[inline]
7150	#[target_feature(enable = "avx512f")]
7151	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7152	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7153	#[rustc_legacy_const_generics(`2`, `3`)]
7154	pub unsafe fn _mm512_maskz_getmant_pd<
7155	const NORM: _MM_MANTISSA_NORM_ENUM,
7156	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7157	>(
7158	k: __mmask8,
7159	a: __m512d,
7160	) -> __m512d {
7161	static_assert_uimm_bits!(NORM, `4`);
7162	static_assert_uimm_bits!(SIGN, `2`);
7163	let a: f64x8 = a.as_f64x8();
7164	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
7165	let r: f64x8 = vgetmantpd(a, SIGN << `2` \| NORM, src:zero, m:k, _MM_FROUND_CUR_DIRECTION);
7166	transmute(src:r)
7167	}
7168
7169	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7170	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7171	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7172	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7173	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7174	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7175	/// The sign is determined by sc which can take the following values:\
7176	/// _MM_MANT_SIGN_src // sign = sign(src)\
7177	/// _MM_MANT_SIGN_zero // sign = 0\
7178	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7179	///
7180	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7181	#[inline]
7182	#[target_feature(enable = "avx512f,avx512vl")]
7183	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7184	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7185	#[rustc_legacy_const_generics(`1`, `2`)]
7186	pub unsafe fn _mm256_getmant_pd<
7187	const NORM: _MM_MANTISSA_NORM_ENUM,
7188	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7189	>(
7190	a: __m256d,
7191	) -> __m256d {
7192	static_assert_uimm_bits!(NORM, `4`);
7193	static_assert_uimm_bits!(SIGN, `2`);
7194	let a: f64x4 = a.as_f64x4();
7195	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
7196	let r: f64x4 = vgetmantpd256(a, SIGN << `2` \| NORM, src:zero, m:`0b00001111`);
7197	transmute(src:r)
7198	}
7199
7200	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7201	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7202	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7203	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7204	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7205	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7206	/// The sign is determined by sc which can take the following values:\
7207	/// _MM_MANT_SIGN_src // sign = sign(src)\
7208	/// _MM_MANT_SIGN_zero // sign = 0\
7209	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7210	///
7211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7212	#[inline]
7213	#[target_feature(enable = "avx512f,avx512vl")]
7214	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7215	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7216	#[rustc_legacy_const_generics(`3`, `4`)]
7217	pub unsafe fn _mm256_mask_getmant_pd<
7218	const NORM: _MM_MANTISSA_NORM_ENUM,
7219	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7220	>(
7221	src: __m256d,
7222	k: __mmask8,
7223	a: __m256d,
7224	) -> __m256d {
7225	static_assert_uimm_bits!(NORM, `4`);
7226	static_assert_uimm_bits!(SIGN, `2`);
7227	let a: f64x4 = a.as_f64x4();
7228	let src: f64x4 = src.as_f64x4();
7229	let r: f64x4 = vgetmantpd256(a, SIGN << `2` \| NORM, src, m:k);
7230	transmute(src:r)
7231	}
7232
7233	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7234	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7235	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7236	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7237	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7238	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7239	/// The sign is determined by sc which can take the following values:\
7240	/// _MM_MANT_SIGN_src // sign = sign(src)\
7241	/// _MM_MANT_SIGN_zero // sign = 0\
7242	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7243	///
7244	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7245	#[inline]
7246	#[target_feature(enable = "avx512f,avx512vl")]
7247	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7248	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7249	#[rustc_legacy_const_generics(`2`, `3`)]
7250	pub unsafe fn _mm256_maskz_getmant_pd<
7251	const NORM: _MM_MANTISSA_NORM_ENUM,
7252	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7253	>(
7254	k: __mmask8,
7255	a: __m256d,
7256	) -> __m256d {
7257	static_assert_uimm_bits!(NORM, `4`);
7258	static_assert_uimm_bits!(SIGN, `2`);
7259	let a: f64x4 = a.as_f64x4();
7260	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
7261	let r: f64x4 = vgetmantpd256(a, SIGN << `2` \| NORM, src:zero, m:k);
7262	transmute(src:r)
7263	}
7264
7265	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7266	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7267	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7268	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7269	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7270	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7271	/// The sign is determined by sc which can take the following values:\
7272	/// _MM_MANT_SIGN_src // sign = sign(src)\
7273	/// _MM_MANT_SIGN_zero // sign = 0\
7274	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7275	///
7276	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7277	#[inline]
7278	#[target_feature(enable = "avx512f,avx512vl")]
7279	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7280	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7281	#[rustc_legacy_const_generics(`1`, `2`)]
7282	pub unsafe fn _mm_getmant_pd<
7283	const NORM: _MM_MANTISSA_NORM_ENUM,
7284	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7285	>(
7286	a: __m128d,
7287	) -> __m128d {
7288	static_assert_uimm_bits!(NORM, `4`);
7289	static_assert_uimm_bits!(SIGN, `2`);
7290	let a: f64x2 = a.as_f64x2();
7291	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
7292	let r: f64x2 = vgetmantpd128(a, SIGN << `2` \| NORM, src:zero, m:`0b00000011`);
7293	transmute(src:r)
7294	}
7295
7296	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7297	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7298	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7299	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7300	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7301	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7302	/// The sign is determined by sc which can take the following values:\
7303	/// _MM_MANT_SIGN_src // sign = sign(src)\
7304	/// _MM_MANT_SIGN_zero // sign = 0\
7305	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7306	///
7307	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7308	#[inline]
7309	#[target_feature(enable = "avx512f,avx512vl")]
7310	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7311	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7312	#[rustc_legacy_const_generics(`3`, `4`)]
7313	pub unsafe fn _mm_mask_getmant_pd<
7314	const NORM: _MM_MANTISSA_NORM_ENUM,
7315	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7316	>(
7317	src: __m128d,
7318	k: __mmask8,
7319	a: __m128d,
7320	) -> __m128d {
7321	static_assert_uimm_bits!(NORM, `4`);
7322	static_assert_uimm_bits!(SIGN, `2`);
7323	let a: f64x2 = a.as_f64x2();
7324	let src: f64x2 = src.as_f64x2();
7325	let r: f64x2 = vgetmantpd128(a, SIGN << `2` \| NORM, src, m:k);
7326	transmute(src:r)
7327	}
7328
7329	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7330	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7331	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7332	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7333	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7334	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7335	/// The sign is determined by sc which can take the following values:\
7336	/// _MM_MANT_SIGN_src // sign = sign(src)\
7337	/// _MM_MANT_SIGN_zero // sign = 0\
7338	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7339	///
7340	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7341	#[inline]
7342	#[target_feature(enable = "avx512f,avx512vl")]
7343	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7344	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7345	#[rustc_legacy_const_generics(`2`, `3`)]
7346	pub unsafe fn _mm_maskz_getmant_pd<
7347	const NORM: _MM_MANTISSA_NORM_ENUM,
7348	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7349	>(
7350	k: __mmask8,
7351	a: __m128d,
7352	) -> __m128d {
7353	static_assert_uimm_bits!(NORM, `4`);
7354	static_assert_uimm_bits!(SIGN, `2`);
7355	let a: f64x2 = a.as_f64x2();
7356	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
7357	let r: f64x2 = vgetmantpd128(a, SIGN << `2` \| NORM, src:zero, m:k);
7358	transmute(src:r)
7359	}
7360
7361	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7362	///
7363	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7364	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7365	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7366	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7367	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7368	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7369	///
7370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7371	#[inline]
7372	#[target_feature(enable = "avx512f")]
7373	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7374	#[cfg_attr(test, assert_instr(vaddps, ROUNDING = `8`))]
7375	#[rustc_legacy_const_generics(`2`)]
7376	pub unsafe fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7377	static_assert_rounding!(ROUNDING);
7378	let a: f32x16 = a.as_f32x16();
7379	let b: f32x16 = b.as_f32x16();
7380	let r: f32x16 = vaddps(a, b, ROUNDING);
7381	transmute(src:r)
7382	}
7383
7384	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7385	///
7386	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7387	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7388	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7389	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7390	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7391	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7392	///
7393	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7394	#[inline]
7395	#[target_feature(enable = "avx512f")]
7396	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7397	#[cfg_attr(test, assert_instr(vaddps, ROUNDING = `8`))]
7398	#[rustc_legacy_const_generics(`4`)]
7399	pub unsafe fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7400	src: __m512,
7401	k: __mmask16,
7402	a: __m512,
7403	b: __m512,
7404	) -> __m512 {
7405	static_assert_rounding!(ROUNDING);
7406	let a: f32x16 = a.as_f32x16();
7407	let b: f32x16 = b.as_f32x16();
7408	let r: f32x16 = vaddps(a, b, ROUNDING);
7409	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7410	}
7411
7412	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7413	///
7414	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7415	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7416	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7417	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7418	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7419	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7420	///
7421	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7422	#[inline]
7423	#[target_feature(enable = "avx512f")]
7424	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7425	#[cfg_attr(test, assert_instr(vaddps, ROUNDING = `8`))]
7426	#[rustc_legacy_const_generics(`3`)]
7427	pub unsafe fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7428	k: __mmask16,
7429	a: __m512,
7430	b: __m512,
7431	) -> __m512 {
7432	static_assert_rounding!(ROUNDING);
7433	let a: f32x16 = a.as_f32x16();
7434	let b: f32x16 = b.as_f32x16();
7435	let r: f32x16 = vaddps(a, b, ROUNDING);
7436	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
7437	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7438	}
7439
7440	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7441	///
7442	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7443	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7444	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7445	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7446	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7447	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7448	///
7449	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
7450	#[inline]
7451	#[target_feature(enable = "avx512f")]
7452	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7453	#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = `8`))]
7454	#[rustc_legacy_const_generics(`2`)]
7455	pub unsafe fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7456	static_assert_rounding!(ROUNDING);
7457	let a: f64x8 = a.as_f64x8();
7458	let b: f64x8 = b.as_f64x8();
7459	let r: f64x8 = vaddpd(a, b, ROUNDING);
7460	transmute(src:r)
7461	}
7462
7463	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7464	///
7465	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7466	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7467	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7468	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7469	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7470	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7471	///
7472	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
7473	#[inline]
7474	#[target_feature(enable = "avx512f")]
7475	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7476	#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = `8`))]
7477	#[rustc_legacy_const_generics(`4`)]
7478	pub unsafe fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
7479	src: __m512d,
7480	k: __mmask8,
7481	a: __m512d,
7482	b: __m512d,
7483	) -> __m512d {
7484	static_assert_rounding!(ROUNDING);
7485	let a: f64x8 = a.as_f64x8();
7486	let b: f64x8 = b.as_f64x8();
7487	let r: f64x8 = vaddpd(a, b, ROUNDING);
7488	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7489	}
7490
7491	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7492	///
7493	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7494	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7495	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7496	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7497	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7498	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7499	///
7500	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
7501	#[inline]
7502	#[target_feature(enable = "avx512f")]
7503	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7504	#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = `8`))]
7505	#[rustc_legacy_const_generics(`3`)]
7506	pub unsafe fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
7507	k: __mmask8,
7508	a: __m512d,
7509	b: __m512d,
7510	) -> __m512d {
7511	static_assert_rounding!(ROUNDING);
7512	let a: f64x8 = a.as_f64x8();
7513	let b: f64x8 = b.as_f64x8();
7514	let r: f64x8 = vaddpd(a, b, ROUNDING);
7515	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
7516	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7517	}
7518
7519	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7520	///
7521	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7522	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7523	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7524	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7525	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7526	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7527	///
7528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
7529	#[inline]
7530	#[target_feature(enable = "avx512f")]
7531	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7532	#[cfg_attr(test, assert_instr(vsubps, ROUNDING = `8`))]
7533	#[rustc_legacy_const_generics(`2`)]
7534	pub unsafe fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7535	static_assert_rounding!(ROUNDING);
7536	let a: f32x16 = a.as_f32x16();
7537	let b: f32x16 = b.as_f32x16();
7538	let r: f32x16 = vsubps(a, b, ROUNDING);
7539	transmute(src:r)
7540	}
7541
7542	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7543	///
7544	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7545	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7546	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7547	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7548	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7549	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7550	///
7551	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
7552	#[inline]
7553	#[target_feature(enable = "avx512f")]
7554	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7555	#[cfg_attr(test, assert_instr(vsubps, ROUNDING = `8`))]
7556	#[rustc_legacy_const_generics(`4`)]
7557	pub unsafe fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
7558	src: __m512,
7559	k: __mmask16,
7560	a: __m512,
7561	b: __m512,
7562	) -> __m512 {
7563	static_assert_rounding!(ROUNDING);
7564	let a: f32x16 = a.as_f32x16();
7565	let b: f32x16 = b.as_f32x16();
7566	let r: f32x16 = vsubps(a, b, ROUNDING);
7567	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7568	}
7569
7570	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7571	///
7572	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7573	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7574	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7575	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7576	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7577	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7578	///
7579	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
7580	#[inline]
7581	#[target_feature(enable = "avx512f")]
7582	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7583	#[cfg_attr(test, assert_instr(vsubps, ROUNDING = `8`))]
7584	#[rustc_legacy_const_generics(`3`)]
7585	pub unsafe fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
7586	k: __mmask16,
7587	a: __m512,
7588	b: __m512,
7589	) -> __m512 {
7590	static_assert_rounding!(ROUNDING);
7591	let a: f32x16 = a.as_f32x16();
7592	let b: f32x16 = b.as_f32x16();
7593	let r: f32x16 = vsubps(a, b, ROUNDING);
7594	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
7595	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7596	}
7597
7598	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7599	///
7600	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7601	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7602	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7603	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7604	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7605	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7606	///
7607	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
7608	#[inline]
7609	#[target_feature(enable = "avx512f")]
7610	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7611	#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = `8`))]
7612	#[rustc_legacy_const_generics(`2`)]
7613	pub unsafe fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7614	static_assert_rounding!(ROUNDING);
7615	let a: f64x8 = a.as_f64x8();
7616	let b: f64x8 = b.as_f64x8();
7617	let r: f64x8 = vsubpd(a, b, ROUNDING);
7618	transmute(src:r)
7619	}
7620
7621	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7622	///
7623	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7624	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7625	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7626	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7627	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7628	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7629	///
7630	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
7631	#[inline]
7632	#[target_feature(enable = "avx512f")]
7633	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7634	#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = `8`))]
7635	#[rustc_legacy_const_generics(`4`)]
7636	pub unsafe fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
7637	src: __m512d,
7638	k: __mmask8,
7639	a: __m512d,
7640	b: __m512d,
7641	) -> __m512d {
7642	static_assert_rounding!(ROUNDING);
7643	let a: f64x8 = a.as_f64x8();
7644	let b: f64x8 = b.as_f64x8();
7645	let r: f64x8 = vsubpd(a, b, ROUNDING);
7646	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7647	}
7648
7649	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7650	///
7651	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7652	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7653	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7654	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7655	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7656	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7657	///
7658	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
7659	#[inline]
7660	#[target_feature(enable = "avx512f")]
7661	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7662	#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = `8`))]
7663	#[rustc_legacy_const_generics(`3`)]
7664	pub unsafe fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
7665	k: __mmask8,
7666	a: __m512d,
7667	b: __m512d,
7668	) -> __m512d {
7669	static_assert_rounding!(ROUNDING);
7670	let a: f64x8 = a.as_f64x8();
7671	let b: f64x8 = b.as_f64x8();
7672	let r: f64x8 = vsubpd(a, b, ROUNDING);
7673	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
7674	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7675	}
7676
7677	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7678	///
7679	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7680	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7681	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7682	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7683	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7684	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7685	///
7686	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
7687	#[inline]
7688	#[target_feature(enable = "avx512f")]
7689	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7690	#[cfg_attr(test, assert_instr(vmulps, ROUNDING = `8`))]
7691	#[rustc_legacy_const_generics(`2`)]
7692	pub unsafe fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7693	static_assert_rounding!(ROUNDING);
7694	let a: f32x16 = a.as_f32x16();
7695	let b: f32x16 = b.as_f32x16();
7696	let r: f32x16 = vmulps(a, b, ROUNDING);
7697	transmute(src:r)
7698	}
7699
7700	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7701	///
7702	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7703	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7704	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7705	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7706	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7707	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7708	///
7709	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
7710	#[inline]
7711	#[target_feature(enable = "avx512f")]
7712	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7713	#[cfg_attr(test, assert_instr(vmulps, ROUNDING = `8`))]
7714	#[rustc_legacy_const_generics(`4`)]
7715	pub unsafe fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
7716	src: __m512,
7717	k: __mmask16,
7718	a: __m512,
7719	b: __m512,
7720	) -> __m512 {
7721	static_assert_rounding!(ROUNDING);
7722	let a: f32x16 = a.as_f32x16();
7723	let b: f32x16 = b.as_f32x16();
7724	let r: f32x16 = vmulps(a, b, ROUNDING);
7725	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7726	}
7727
7728	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7729	///
7730	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7731	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7732	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7733	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7734	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7735	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7736	///
7737	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
7738	#[inline]
7739	#[target_feature(enable = "avx512f")]
7740	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7741	#[cfg_attr(test, assert_instr(vmulps, ROUNDING = `8`))]
7742	#[rustc_legacy_const_generics(`3`)]
7743	pub unsafe fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
7744	k: __mmask16,
7745	a: __m512,
7746	b: __m512,
7747	) -> __m512 {
7748	static_assert_rounding!(ROUNDING);
7749	let a: f32x16 = a.as_f32x16();
7750	let b: f32x16 = b.as_f32x16();
7751	let r: f32x16 = vmulps(a, b, ROUNDING);
7752	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
7753	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7754	}
7755
7756	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7757	///
7758	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7759	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7760	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7761	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7762	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7763	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7764	///
7765	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
7766	#[inline]
7767	#[target_feature(enable = "avx512f")]
7768	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7769	#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = `8`))]
7770	#[rustc_legacy_const_generics(`2`)]
7771	pub unsafe fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7772	static_assert_rounding!(ROUNDING);
7773	let a: f64x8 = a.as_f64x8();
7774	let b: f64x8 = b.as_f64x8();
7775	let r: f64x8 = vmulpd(a, b, ROUNDING);
7776	transmute(src:r)
7777	}
7778
7779	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7780	///
7781	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7782	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7783	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7784	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7785	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7786	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7787	///
7788	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
7789	#[inline]
7790	#[target_feature(enable = "avx512f")]
7791	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7792	#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = `8`))]
7793	#[rustc_legacy_const_generics(`4`)]
7794	pub unsafe fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
7795	src: __m512d,
7796	k: __mmask8,
7797	a: __m512d,
7798	b: __m512d,
7799	) -> __m512d {
7800	static_assert_rounding!(ROUNDING);
7801	let a: f64x8 = a.as_f64x8();
7802	let b: f64x8 = b.as_f64x8();
7803	let r: f64x8 = vmulpd(a, b, ROUNDING);
7804	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7805	}
7806
7807	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7808	///
7809	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7810	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7811	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7812	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7813	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7814	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7815	///
7816	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
7817	#[inline]
7818	#[target_feature(enable = "avx512f")]
7819	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7820	#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = `8`))]
7821	#[rustc_legacy_const_generics(`3`)]
7822	pub unsafe fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
7823	k: __mmask8,
7824	a: __m512d,
7825	b: __m512d,
7826	) -> __m512d {
7827	static_assert_rounding!(ROUNDING);
7828	let a: f64x8 = a.as_f64x8();
7829	let b: f64x8 = b.as_f64x8();
7830	let r: f64x8 = vmulpd(a, b, ROUNDING);
7831	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
7832	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7833	}
7834
7835	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
7836	///
7837	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7838	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7839	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7840	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7841	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7842	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7843	///
7844	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
7845	#[inline]
7846	#[target_feature(enable = "avx512f")]
7847	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7848	#[cfg_attr(test, assert_instr(vdivps, ROUNDING = `8`))]
7849	#[rustc_legacy_const_generics(`2`)]
7850	pub unsafe fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7851	static_assert_rounding!(ROUNDING);
7852	let a: f32x16 = a.as_f32x16();
7853	let b: f32x16 = b.as_f32x16();
7854	let r: f32x16 = vdivps(a, b, ROUNDING);
7855	transmute(src:r)
7856	}
7857
7858	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7859	///
7860	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7861	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7862	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7863	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7864	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7865	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7866	///
7867	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
7868	#[inline]
7869	#[target_feature(enable = "avx512f")]
7870	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7871	#[cfg_attr(test, assert_instr(vdivps, ROUNDING = `8`))]
7872	#[rustc_legacy_const_generics(`4`)]
7873	pub unsafe fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
7874	src: __m512,
7875	k: __mmask16,
7876	a: __m512,
7877	b: __m512,
7878	) -> __m512 {
7879	static_assert_rounding!(ROUNDING);
7880	let a: f32x16 = a.as_f32x16();
7881	let b: f32x16 = b.as_f32x16();
7882	let r: f32x16 = vdivps(a, b, ROUNDING);
7883	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7884	}
7885
7886	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7887	///
7888	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7889	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7890	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7891	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7892	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7893	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7894	///
7895	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
7896	#[inline]
7897	#[target_feature(enable = "avx512f")]
7898	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7899	#[cfg_attr(test, assert_instr(vdivps, ROUNDING = `8`))]
7900	#[rustc_legacy_const_generics(`3`)]
7901	pub unsafe fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
7902	k: __mmask16,
7903	a: __m512,
7904	b: __m512,
7905	) -> __m512 {
7906	static_assert_rounding!(ROUNDING);
7907	let a: f32x16 = a.as_f32x16();
7908	let b: f32x16 = b.as_f32x16();
7909	let r: f32x16 = vdivps(a, b, ROUNDING);
7910	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
7911	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7912	}
7913
7914	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
7915	///
7916	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7917	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7918	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7919	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7920	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7921	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7922	///
7923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
7924	#[inline]
7925	#[target_feature(enable = "avx512f")]
7926	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7927	#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = `8`))]
7928	#[rustc_legacy_const_generics(`2`)]
7929	pub unsafe fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7930	static_assert_rounding!(ROUNDING);
7931	let a: f64x8 = a.as_f64x8();
7932	let b: f64x8 = b.as_f64x8();
7933	let r: f64x8 = vdivpd(a, b, ROUNDING);
7934	transmute(src:r)
7935	}
7936
7937	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7938	///
7939	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7940	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7941	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7942	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7943	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7944	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7945	///
7946	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
7947	#[inline]
7948	#[target_feature(enable = "avx512f")]
7949	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7950	#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = `8`))]
7951	#[rustc_legacy_const_generics(`4`)]
7952	pub unsafe fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
7953	src: __m512d,
7954	k: __mmask8,
7955	a: __m512d,
7956	b: __m512d,
7957	) -> __m512d {
7958	static_assert_rounding!(ROUNDING);
7959	let a: f64x8 = a.as_f64x8();
7960	let b: f64x8 = b.as_f64x8();
7961	let r: f64x8 = vdivpd(a, b, ROUNDING);
7962	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7963	}
7964
7965	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7966	///
7967	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7968	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7969	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7970	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7971	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7972	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7973	///
7974	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
7975	#[inline]
7976	#[target_feature(enable = "avx512f")]
7977	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7978	#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = `8`))]
7979	#[rustc_legacy_const_generics(`3`)]
7980	pub unsafe fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
7981	k: __mmask8,
7982	a: __m512d,
7983	b: __m512d,
7984	) -> __m512d {
7985	static_assert_rounding!(ROUNDING);
7986	let a: f64x8 = a.as_f64x8();
7987	let b: f64x8 = b.as_f64x8();
7988	let r: f64x8 = vdivpd(a, b, ROUNDING);
7989	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
7990	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7991	}
7992
7993	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7994	///
7995	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7996	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7997	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7998	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7999	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8000	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8001	///
8002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8003	#[inline]
8004	#[target_feature(enable = "avx512f")]
8005	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8006	#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = `8`))]
8007	#[rustc_legacy_const_generics(`1`)]
8008	pub unsafe fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8009	static_assert_rounding!(ROUNDING);
8010	let a: f32x16 = a.as_f32x16();
8011	let r: f32x16 = vsqrtps(a, ROUNDING);
8012	transmute(src:r)
8013	}
8014
8015	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8016	///
8017	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8018	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8019	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8020	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8021	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8022	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8023	///
8024	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8025	#[inline]
8026	#[target_feature(enable = "avx512f")]
8027	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8028	#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = `8`))]
8029	#[rustc_legacy_const_generics(`3`)]
8030	pub unsafe fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8031	src: __m512,
8032	k: __mmask16,
8033	a: __m512,
8034	) -> __m512 {
8035	static_assert_rounding!(ROUNDING);
8036	let a: f32x16 = a.as_f32x16();
8037	let r: f32x16 = vsqrtps(a, ROUNDING);
8038	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8039	}
8040
8041	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8042	///
8043	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8044	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8045	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8046	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8047	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8048	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8049	///
8050	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8051	#[inline]
8052	#[target_feature(enable = "avx512f")]
8053	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8054	#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = `8`))]
8055	#[rustc_legacy_const_generics(`2`)]
8056	pub unsafe fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8057	static_assert_rounding!(ROUNDING);
8058	let a: f32x16 = a.as_f32x16();
8059	let r: f32x16 = vsqrtps(a, ROUNDING);
8060	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
8061	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8062	}
8063
8064	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8065	///
8066	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8068	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8069	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8070	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8071	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8072	///
8073	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8074	#[inline]
8075	#[target_feature(enable = "avx512f")]
8076	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8077	#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = `8`))]
8078	#[rustc_legacy_const_generics(`1`)]
8079	pub unsafe fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8080	static_assert_rounding!(ROUNDING);
8081	let a: f64x8 = a.as_f64x8();
8082	let r: f64x8 = vsqrtpd(a, ROUNDING);
8083	transmute(src:r)
8084	}
8085
8086	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8087	///
8088	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8089	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8090	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8091	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8092	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8093	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8094	///
8095	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8096	#[inline]
8097	#[target_feature(enable = "avx512f")]
8098	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8099	#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = `8`))]
8100	#[rustc_legacy_const_generics(`3`)]
8101	pub unsafe fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8102	src: __m512d,
8103	k: __mmask8,
8104	a: __m512d,
8105	) -> __m512d {
8106	static_assert_rounding!(ROUNDING);
8107	let a: f64x8 = a.as_f64x8();
8108	let r: f64x8 = vsqrtpd(a, ROUNDING);
8109	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8110	}
8111
8112	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8113	///
8114	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8115	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8116	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8117	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8118	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8119	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8120	///
8121	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8122	#[inline]
8123	#[target_feature(enable = "avx512f")]
8124	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8125	#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = `8`))]
8126	#[rustc_legacy_const_generics(`2`)]
8127	pub unsafe fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8128	static_assert_rounding!(ROUNDING);
8129	let a: f64x8 = a.as_f64x8();
8130	let r: f64x8 = vsqrtpd(a, ROUNDING);
8131	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
8132	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8133	}
8134
8135	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8136	///
8137	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8138	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8139	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8140	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8141	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8142	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8143	///
8144	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8145	#[inline]
8146	#[target_feature(enable = "avx512f")]
8147	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8148	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8149	#[rustc_legacy_const_generics(`3`)]
8150	pub unsafe fn _mm512_fmadd_round_ps<const ROUNDING: i32>(
8151	a: __m512,
8152	b: __m512,
8153	c: __m512,
8154	) -> __m512 {
8155	static_assert_rounding!(ROUNDING);
8156	let a: f32x16 = a.as_f32x16();
8157	let b: f32x16 = b.as_f32x16();
8158	let c: f32x16 = c.as_f32x16();
8159	let r: f32x16 = vfmadd132psround(a, b, c, ROUNDING);
8160	transmute(src:r)
8161	}
8162
8163	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8164	///
8165	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8166	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8167	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8168	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8169	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8170	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8171	///
8172	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8173	#[inline]
8174	#[target_feature(enable = "avx512f")]
8175	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8176	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8177	#[rustc_legacy_const_generics(`4`)]
8178	pub unsafe fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8179	a: __m512,
8180	k: __mmask16,
8181	b: __m512,
8182	c: __m512,
8183	) -> __m512 {
8184	static_assert_rounding!(ROUNDING);
8185	let a: f32x16 = a.as_f32x16();
8186	let b: f32x16 = b.as_f32x16();
8187	let c: f32x16 = c.as_f32x16();
8188	let r: f32x16 = vfmadd132psround(a, b, c, ROUNDING);
8189	transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8190	}
8191
8192	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8193	///
8194	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8195	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8196	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8197	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8198	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8199	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8200	///
8201	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8202	#[inline]
8203	#[target_feature(enable = "avx512f")]
8204	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8205	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8206	#[rustc_legacy_const_generics(`4`)]
8207	pub unsafe fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8208	k: __mmask16,
8209	a: __m512,
8210	b: __m512,
8211	c: __m512,
8212	) -> __m512 {
8213	static_assert_rounding!(ROUNDING);
8214	let a: f32x16 = a.as_f32x16();
8215	let b: f32x16 = b.as_f32x16();
8216	let c: f32x16 = c.as_f32x16();
8217	let r: f32x16 = vfmadd132psround(a, b, c, ROUNDING);
8218	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
8219	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8220	}
8221
8222	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8223	///
8224	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8225	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8226	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8227	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8228	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8229	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8230	///
8231	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8232	#[inline]
8233	#[target_feature(enable = "avx512f")]
8234	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8235	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8236	#[rustc_legacy_const_generics(`4`)]
8237	pub unsafe fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8238	a: __m512,
8239	b: __m512,
8240	c: __m512,
8241	k: __mmask16,
8242	) -> __m512 {
8243	static_assert_rounding!(ROUNDING);
8244	let a: f32x16 = a.as_f32x16();
8245	let b: f32x16 = b.as_f32x16();
8246	let c: f32x16 = c.as_f32x16();
8247	let r: f32x16 = vfmadd132psround(a, b, c, ROUNDING);
8248	transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8249	}
8250
8251	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8252	///
8253	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8254	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8255	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8256	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8257	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8258	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8259	///
8260	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8261	#[inline]
8262	#[target_feature(enable = "avx512f")]
8263	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8264	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8265	#[rustc_legacy_const_generics(`3`)]
8266	pub unsafe fn _mm512_fmadd_round_pd<const ROUNDING: i32>(
8267	a: __m512d,
8268	b: __m512d,
8269	c: __m512d,
8270	) -> __m512d {
8271	static_assert_rounding!(ROUNDING);
8272	let a: f64x8 = a.as_f64x8();
8273	let b: f64x8 = b.as_f64x8();
8274	let c: f64x8 = c.as_f64x8();
8275	let r: f64x8 = vfmadd132pdround(a, b, c, ROUNDING);
8276	transmute(src:r)
8277	}
8278
8279	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8280	///
8281	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8282	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8283	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8284	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8285	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8286	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8287	///
8288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8289	#[inline]
8290	#[target_feature(enable = "avx512f")]
8291	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8292	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8293	#[rustc_legacy_const_generics(`4`)]
8294	pub unsafe fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8295	a: __m512d,
8296	k: __mmask8,
8297	b: __m512d,
8298	c: __m512d,
8299	) -> __m512d {
8300	static_assert_rounding!(ROUNDING);
8301	let a: f64x8 = a.as_f64x8();
8302	let b: f64x8 = b.as_f64x8();
8303	let c: f64x8 = c.as_f64x8();
8304	let r: f64x8 = vfmadd132pdround(a, b, c, ROUNDING);
8305	transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8306	}
8307
8308	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8309	///
8310	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8311	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8312	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8313	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8314	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8315	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8316	///
8317	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8318	#[inline]
8319	#[target_feature(enable = "avx512f")]
8320	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8321	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8322	#[rustc_legacy_const_generics(`4`)]
8323	pub unsafe fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8324	k: __mmask8,
8325	a: __m512d,
8326	b: __m512d,
8327	c: __m512d,
8328	) -> __m512d {
8329	static_assert_rounding!(ROUNDING);
8330	let a: f64x8 = a.as_f64x8();
8331	let b: f64x8 = b.as_f64x8();
8332	let c: f64x8 = c.as_f64x8();
8333	let r: f64x8 = vfmadd132pdround(a, b, c, ROUNDING);
8334	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
8335	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8336	}
8337
8338	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8339	///
8340	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8341	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8342	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8343	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8344	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8345	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8346	///
8347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8348	#[inline]
8349	#[target_feature(enable = "avx512f")]
8350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8351	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8352	#[rustc_legacy_const_generics(`4`)]
8353	pub unsafe fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8354	a: __m512d,
8355	b: __m512d,
8356	c: __m512d,
8357	k: __mmask8,
8358	) -> __m512d {
8359	static_assert_rounding!(ROUNDING);
8360	let a: f64x8 = a.as_f64x8();
8361	let b: f64x8 = b.as_f64x8();
8362	let c: f64x8 = c.as_f64x8();
8363	let r: f64x8 = vfmadd132pdround(a, b, c, ROUNDING);
8364	transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8365	}
8366
8367	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8368	///
8369	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8370	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8371	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8372	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8373	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8374	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8375	///
8376	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8377	#[inline]
8378	#[target_feature(enable = "avx512f")]
8379	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8380	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8381	#[rustc_legacy_const_generics(`3`)]
8382	pub unsafe fn _mm512_fmsub_round_ps<const ROUNDING: i32>(
8383	a: __m512,
8384	b: __m512,
8385	c: __m512,
8386	) -> __m512 {
8387	static_assert_rounding!(ROUNDING);
8388	let zero: f32x16 = mem::zeroed();
8389	let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
8390	let a: f32x16 = a.as_f32x16();
8391	let b: f32x16 = b.as_f32x16();
8392	let r: f32x16 = vfmadd132psround(a, b, c:sub, ROUNDING);
8393	transmute(src:r)
8394	}
8395
8396	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8397	///
8398	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8399	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8400	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8401	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8402	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8403	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8404	///
8405	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8406	#[inline]
8407	#[target_feature(enable = "avx512f")]
8408	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8409	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8410	#[rustc_legacy_const_generics(`4`)]
8411	pub unsafe fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8412	a: __m512,
8413	k: __mmask16,
8414	b: __m512,
8415	c: __m512,
8416	) -> __m512 {
8417	static_assert_rounding!(ROUNDING);
8418	let zero: f32x16 = mem::zeroed();
8419	let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
8420	let a: f32x16 = a.as_f32x16();
8421	let b: f32x16 = b.as_f32x16();
8422	let r: f32x16 = vfmadd132psround(a, b, c:sub, ROUNDING);
8423	transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8424	}
8425
8426	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8427	///
8428	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8429	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8430	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8431	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8432	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8433	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8434	///
8435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
8436	#[inline]
8437	#[target_feature(enable = "avx512f")]
8438	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8439	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8440	#[rustc_legacy_const_generics(`4`)]
8441	pub unsafe fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
8442	k: __mmask16,
8443	a: __m512,
8444	b: __m512,
8445	c: __m512,
8446	) -> __m512 {
8447	static_assert_rounding!(ROUNDING);
8448	let zero: f32x16 = mem::zeroed();
8449	let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
8450	let a: f32x16 = a.as_f32x16();
8451	let b: f32x16 = b.as_f32x16();
8452	let r: f32x16 = vfmadd132psround(a, b, c:sub, ROUNDING);
8453	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8454	}
8455
8456	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8457	///
8458	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8459	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8460	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8461	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8462	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8463	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8464	///
8465	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
8466	#[inline]
8467	#[target_feature(enable = "avx512f")]
8468	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8469	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8470	#[rustc_legacy_const_generics(`4`)]
8471	pub unsafe fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
8472	a: __m512,
8473	b: __m512,
8474	c: __m512,
8475	k: __mmask16,
8476	) -> __m512 {
8477	static_assert_rounding!(ROUNDING);
8478	let zero: f32x16 = mem::zeroed();
8479	let c: f32x16 = c.as_f32x16();
8480	let sub: f32x16 = simd_sub(lhs:zero, rhs:c);
8481	let a: f32x16 = a.as_f32x16();
8482	let b: f32x16 = b.as_f32x16();
8483	let r: f32x16 = vfmadd132psround(a, b, c:sub, ROUNDING);
8484	transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8485	}
8486
8487	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8488	///
8489	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8490	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8491	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8492	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8493	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8494	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8495	///
8496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
8497	#[inline]
8498	#[target_feature(enable = "avx512f")]
8499	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8500	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8501	#[rustc_legacy_const_generics(`3`)]
8502	pub unsafe fn _mm512_fmsub_round_pd<const ROUNDING: i32>(
8503	a: __m512d,
8504	b: __m512d,
8505	c: __m512d,
8506	) -> __m512d {
8507	static_assert_rounding!(ROUNDING);
8508	let zero: f64x8 = mem::zeroed();
8509	let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
8510	let a: f64x8 = a.as_f64x8();
8511	let b: f64x8 = b.as_f64x8();
8512	let r: f64x8 = vfmadd132pdround(a, b, c:sub, ROUNDING);
8513	transmute(src:r)
8514	}
8515
8516	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8517	///
8518	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8519	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8520	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8521	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8522	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8523	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8524	///
8525	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
8526	#[inline]
8527	#[target_feature(enable = "avx512f")]
8528	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8529	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8530	#[rustc_legacy_const_generics(`4`)]
8531	pub unsafe fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
8532	a: __m512d,
8533	k: __mmask8,
8534	b: __m512d,
8535	c: __m512d,
8536	) -> __m512d {
8537	static_assert_rounding!(ROUNDING);
8538	let zero: f64x8 = mem::zeroed();
8539	let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
8540	let a: f64x8 = a.as_f64x8();
8541	let b: f64x8 = b.as_f64x8();
8542	let r: f64x8 = vfmadd132pdround(a, b, c:sub, ROUNDING);
8543	transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8544	}
8545
8546	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8547	///
8548	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8549	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8550	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8551	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8552	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8553	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8554	///
8555	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
8556	#[inline]
8557	#[target_feature(enable = "avx512f")]
8558	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8559	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8560	#[rustc_legacy_const_generics(`4`)]
8561	pub unsafe fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
8562	k: __mmask8,
8563	a: __m512d,
8564	b: __m512d,
8565	c: __m512d,
8566	) -> __m512d {
8567	static_assert_rounding!(ROUNDING);
8568	let zero: f64x8 = mem::zeroed();
8569	let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
8570	let a: f64x8 = a.as_f64x8();
8571	let b: f64x8 = b.as_f64x8();
8572	let r: f64x8 = vfmadd132pdround(a, b, c:sub, ROUNDING);
8573	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8574	}
8575
8576	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8577	///
8578	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8579	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8580	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8581	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8582	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8583	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8584	///
8585	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
8586	#[inline]
8587	#[target_feature(enable = "avx512f")]
8588	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8589	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8590	#[rustc_legacy_const_generics(`4`)]
8591	pub unsafe fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
8592	a: __m512d,
8593	b: __m512d,
8594	c: __m512d,
8595	k: __mmask8,
8596	) -> __m512d {
8597	static_assert_rounding!(ROUNDING);
8598	let zero: f64x8 = mem::zeroed();
8599	let c: f64x8 = c.as_f64x8();
8600	let sub: f64x8 = simd_sub(lhs:zero, rhs:c);
8601	let a: f64x8 = a.as_f64x8();
8602	let b: f64x8 = b.as_f64x8();
8603	let r: f64x8 = vfmadd132pdround(a, b, c:sub, ROUNDING);
8604	transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8605	}
8606
8607	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8608	///
8609	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8610	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8611	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8612	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8613	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8614	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8615	///
8616	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
8617	#[inline]
8618	#[target_feature(enable = "avx512f")]
8619	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8620	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8621	#[rustc_legacy_const_generics(`3`)]
8622	pub unsafe fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(
8623	a: __m512,
8624	b: __m512,
8625	c: __m512,
8626	) -> __m512 {
8627	static_assert_rounding!(ROUNDING);
8628	let a: f32x16 = a.as_f32x16();
8629	let b: f32x16 = b.as_f32x16();
8630	let c: f32x16 = c.as_f32x16();
8631	let r: f32x16 = vfmaddsub213ps(a, b, c, ROUNDING);
8632	transmute(src:r)
8633	}
8634
8635	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8636	///
8637	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8638	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8639	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8640	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8641	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8642	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8643	///
8644	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
8645	#[inline]
8646	#[target_feature(enable = "avx512f")]
8647	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8648	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8649	#[rustc_legacy_const_generics(`4`)]
8650	pub unsafe fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
8651	a: __m512,
8652	k: __mmask16,
8653	b: __m512,
8654	c: __m512,
8655	) -> __m512 {
8656	static_assert_rounding!(ROUNDING);
8657	let a: f32x16 = a.as_f32x16();
8658	let b: f32x16 = b.as_f32x16();
8659	let c: f32x16 = c.as_f32x16();
8660	let r: f32x16 = vfmaddsub213ps(a, b, c, ROUNDING);
8661	transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8662	}
8663
8664	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8665	///
8666	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8667	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8668	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8669	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8670	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8671	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8672	///
8673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
8674	#[inline]
8675	#[target_feature(enable = "avx512f")]
8676	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8677	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8678	#[rustc_legacy_const_generics(`4`)]
8679	pub unsafe fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
8680	k: __mmask16,
8681	a: __m512,
8682	b: __m512,
8683	c: __m512,
8684	) -> __m512 {
8685	static_assert_rounding!(ROUNDING);
8686	let a: f32x16 = a.as_f32x16();
8687	let b: f32x16 = b.as_f32x16();
8688	let c: f32x16 = c.as_f32x16();
8689	let r: f32x16 = vfmaddsub213ps(a, b, c, ROUNDING);
8690	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
8691	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8692	}
8693
8694	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8695	///
8696	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8697	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8698	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8699	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8700	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8701	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8702	///
8703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
8704	#[inline]
8705	#[target_feature(enable = "avx512f")]
8706	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8707	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8708	#[rustc_legacy_const_generics(`4`)]
8709	pub unsafe fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
8710	a: __m512,
8711	b: __m512,
8712	c: __m512,
8713	k: __mmask16,
8714	) -> __m512 {
8715	static_assert_rounding!(ROUNDING);
8716	let a: f32x16 = a.as_f32x16();
8717	let b: f32x16 = b.as_f32x16();
8718	let c: f32x16 = c.as_f32x16();
8719	let r: f32x16 = vfmaddsub213ps(a, b, c, ROUNDING);
8720	transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8721	}
8722
8723	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8724	///
8725	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8726	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8727	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8728	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8729	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8730	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8731	///
8732	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
8733	#[inline]
8734	#[target_feature(enable = "avx512f")]
8735	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8736	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
8737	#[rustc_legacy_const_generics(`3`)]
8738	pub unsafe fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
8739	a: __m512d,
8740	b: __m512d,
8741	c: __m512d,
8742	) -> __m512d {
8743	static_assert_rounding!(ROUNDING);
8744	let a: f64x8 = a.as_f64x8();
8745	let b: f64x8 = b.as_f64x8();
8746	let c: f64x8 = c.as_f64x8();
8747	let r: f64x8 = vfmaddsub213pd(a, b, c, ROUNDING);
8748	transmute(src:r)
8749	}
8750
8751	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8752	///
8753	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8754	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8755	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8756	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8757	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8758	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8759	///
8760	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
8761	#[inline]
8762	#[target_feature(enable = "avx512f")]
8763	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8764	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
8765	#[rustc_legacy_const_generics(`4`)]
8766	pub unsafe fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
8767	a: __m512d,
8768	k: __mmask8,
8769	b: __m512d,
8770	c: __m512d,
8771	) -> __m512d {
8772	static_assert_rounding!(ROUNDING);
8773	let a: f64x8 = a.as_f64x8();
8774	let b: f64x8 = b.as_f64x8();
8775	let c: f64x8 = c.as_f64x8();
8776	let r: f64x8 = vfmaddsub213pd(a, b, c, ROUNDING);
8777	transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8778	}
8779
8780	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8781	///
8782	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8783	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8784	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8785	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8786	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8787	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8788	///
8789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
8790	#[inline]
8791	#[target_feature(enable = "avx512f")]
8792	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8793	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
8794	#[rustc_legacy_const_generics(`4`)]
8795	pub unsafe fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
8796	k: __mmask8,
8797	a: __m512d,
8798	b: __m512d,
8799	c: __m512d,
8800	) -> __m512d {
8801	static_assert_rounding!(ROUNDING);
8802	let a: f64x8 = a.as_f64x8();
8803	let b: f64x8 = b.as_f64x8();
8804	let c: f64x8 = c.as_f64x8();
8805	let r: f64x8 = vfmaddsub213pd(a, b, c, ROUNDING);
8806	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
8807	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8808	}
8809
8810	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8811	///
8812	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8813	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8814	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8815	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8816	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8817	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8818	///
8819	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
8820	#[inline]
8821	#[target_feature(enable = "avx512f")]
8822	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8823	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
8824	#[rustc_legacy_const_generics(`4`)]
8825	pub unsafe fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
8826	a: __m512d,
8827	b: __m512d,
8828	c: __m512d,
8829	k: __mmask8,
8830	) -> __m512d {
8831	static_assert_rounding!(ROUNDING);
8832	let a: f64x8 = a.as_f64x8();
8833	let b: f64x8 = b.as_f64x8();
8834	let c: f64x8 = c.as_f64x8();
8835	let r: f64x8 = vfmaddsub213pd(a, b, c, ROUNDING);
8836	transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8837	}
8838
8839	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
8840	///
8841	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8842	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8843	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8844	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8845	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8846	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8847	///
8848	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
8849	#[inline]
8850	#[target_feature(enable = "avx512f")]
8851	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8852	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
8853	#[rustc_legacy_const_generics(`3`)]
8854	pub unsafe fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(
8855	a: __m512,
8856	b: __m512,
8857	c: __m512,
8858	) -> __m512 {
8859	static_assert_rounding!(ROUNDING);
8860	let zero: f32x16 = mem::zeroed();
8861	let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
8862	let a: f32x16 = a.as_f32x16();
8863	let b: f32x16 = b.as_f32x16();
8864	let r: f32x16 = vfmaddsub213ps(a, b, c:sub, ROUNDING);
8865	transmute(src:r)
8866	}
8867
8868	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8869	///
8870	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8871	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8872	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8873	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8874	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8875	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8876	///
8877	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
8878	#[inline]
8879	#[target_feature(enable = "avx512f")]
8880	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8881	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
8882	#[rustc_legacy_const_generics(`4`)]
8883	pub unsafe fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
8884	a: __m512,
8885	k: __mmask16,
8886	b: __m512,
8887	c: __m512,
8888	) -> __m512 {
8889	static_assert_rounding!(ROUNDING);
8890	let zero: f32x16 = mem::zeroed();
8891	let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
8892	let a: f32x16 = a.as_f32x16();
8893	let b: f32x16 = b.as_f32x16();
8894	let r: f32x16 = vfmaddsub213ps(a, b, c:sub, ROUNDING);
8895	transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8896	}
8897
8898	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8899	///
8900	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8901	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8902	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8903	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8904	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8905	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8906	///
8907	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
8908	#[inline]
8909	#[target_feature(enable = "avx512f")]
8910	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8911	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
8912	#[rustc_legacy_const_generics(`4`)]
8913	pub unsafe fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
8914	k: __mmask16,
8915	a: __m512,
8916	b: __m512,
8917	c: __m512,
8918	) -> __m512 {
8919	static_assert_rounding!(ROUNDING);
8920	let zero: f32x16 = mem::zeroed();
8921	let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
8922	let a: f32x16 = a.as_f32x16();
8923	let b: f32x16 = b.as_f32x16();
8924	let r: f32x16 = vfmaddsub213ps(a, b, c:sub, ROUNDING);
8925	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8926	}
8927
8928	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8929	///
8930	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8931	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8932	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8933	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8934	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8935	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8936	///
8937	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
8938	#[inline]
8939	#[target_feature(enable = "avx512f")]
8940	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8941	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
8942	#[rustc_legacy_const_generics(`4`)]
8943	pub unsafe fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
8944	a: __m512,
8945	b: __m512,
8946	c: __m512,
8947	k: __mmask16,
8948	) -> __m512 {
8949	static_assert_rounding!(ROUNDING);
8950	let zero: f32x16 = mem::zeroed();
8951	let c: f32x16 = c.as_f32x16();
8952	let sub: f32x16 = simd_sub(lhs:zero, rhs:c);
8953	let a: f32x16 = a.as_f32x16();
8954	let b: f32x16 = b.as_f32x16();
8955	let r: f32x16 = vfmaddsub213ps(a, b, c:sub, ROUNDING);
8956	transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8957	}
8958
8959	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
8960	///
8961	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8962	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8963	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8964	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8965	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8966	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8967	///
8968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
8969	#[inline]
8970	#[target_feature(enable = "avx512f")]
8971	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8972	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
8973	#[rustc_legacy_const_generics(`3`)]
8974	pub unsafe fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
8975	a: __m512d,
8976	b: __m512d,
8977	c: __m512d,
8978	) -> __m512d {
8979	static_assert_rounding!(ROUNDING);
8980	let zero: f64x8 = mem::zeroed();
8981	let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
8982	let a: f64x8 = a.as_f64x8();
8983	let b: f64x8 = b.as_f64x8();
8984	let r: f64x8 = vfmaddsub213pd(a, b, c:sub, ROUNDING);
8985	transmute(src:r)
8986	}
8987
8988	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8989	///
8990	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8991	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8992	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8993	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8994	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8995	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8996	///
8997	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
8998	#[inline]
8999	#[target_feature(enable = "avx512f")]
9000	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9001	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9002	#[rustc_legacy_const_generics(`4`)]
9003	pub unsafe fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9004	a: __m512d,
9005	k: __mmask8,
9006	b: __m512d,
9007	c: __m512d,
9008	) -> __m512d {
9009	static_assert_rounding!(ROUNDING);
9010	let zero: f64x8 = mem::zeroed();
9011	let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
9012	let a: f64x8 = a.as_f64x8();
9013	let b: f64x8 = b.as_f64x8();
9014	let r: f64x8 = vfmaddsub213pd(a, b, c:sub, ROUNDING);
9015	transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
9016	}
9017
9018	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9019	///
9020	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9021	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9022	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9023	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9024	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9025	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9026	///
9027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9028	#[inline]
9029	#[target_feature(enable = "avx512f")]
9030	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9031	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9032	#[rustc_legacy_const_generics(`4`)]
9033	pub unsafe fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9034	k: __mmask8,
9035	a: __m512d,
9036	b: __m512d,
9037	c: __m512d,
9038	) -> __m512d {
9039	static_assert_rounding!(ROUNDING);
9040	let zero: f64x8 = mem::zeroed();
9041	let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
9042	let a: f64x8 = a.as_f64x8();
9043	let b: f64x8 = b.as_f64x8();
9044	let r: f64x8 = vfmaddsub213pd(a, b, c:sub, ROUNDING);
9045	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9046	}
9047
9048	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9049	///
9050	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9051	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9052	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9053	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9054	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9055	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9056	///
9057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9058	#[inline]
9059	#[target_feature(enable = "avx512f")]
9060	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9061	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9062	#[rustc_legacy_const_generics(`4`)]
9063	pub unsafe fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9064	a: __m512d,
9065	b: __m512d,
9066	c: __m512d,
9067	k: __mmask8,
9068	) -> __m512d {
9069	static_assert_rounding!(ROUNDING);
9070	let zero: f64x8 = mem::zeroed();
9071	let c: f64x8 = c.as_f64x8();
9072	let sub: f64x8 = simd_sub(lhs:zero, rhs:c);
9073	let a: f64x8 = a.as_f64x8();
9074	let b: f64x8 = b.as_f64x8();
9075	let r: f64x8 = vfmaddsub213pd(a, b, c:sub, ROUNDING);
9076	transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
9077	}
9078
9079	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9080	///
9081	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9082	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9083	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9084	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9085	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9086	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9087	///
9088	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9089	#[inline]
9090	#[target_feature(enable = "avx512f")]
9091	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9092	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9093	#[rustc_legacy_const_generics(`3`)]
9094	pub unsafe fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(
9095	a: __m512,
9096	b: __m512,
9097	c: __m512,
9098	) -> __m512 {
9099	static_assert_rounding!(ROUNDING);
9100	let zero: f32x16 = mem::zeroed();
9101	let sub: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9102	let b: f32x16 = b.as_f32x16();
9103	let c: f32x16 = c.as_f32x16();
9104	let r: f32x16 = vfmadd132psround(a:sub, b, c, ROUNDING);
9105	transmute(src:r)
9106	}
9107
9108	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9109	///
9110	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9111	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9112	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9113	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9114	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9115	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9116	///
9117	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9118	#[inline]
9119	#[target_feature(enable = "avx512f")]
9120	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9121	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9122	#[rustc_legacy_const_generics(`4`)]
9123	pub unsafe fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9124	a: __m512,
9125	k: __mmask16,
9126	b: __m512,
9127	c: __m512,
9128	) -> __m512 {
9129	static_assert_rounding!(ROUNDING);
9130	let zero: f32x16 = mem::zeroed();
9131	let sub: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9132	let b: f32x16 = b.as_f32x16();
9133	let c: f32x16 = c.as_f32x16();
9134	let r: f32x16 = vfmadd132psround(a:sub, b, c, ROUNDING);
9135	transmute(src:simd_select_bitmask(m:k, yes:r, no:a.as_f32x16()))
9136	}
9137
9138	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9139	///
9140	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9141	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9142	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9143	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9144	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9145	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9146	///
9147	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9148	#[inline]
9149	#[target_feature(enable = "avx512f")]
9150	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9151	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9152	#[rustc_legacy_const_generics(`4`)]
9153	pub unsafe fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9154	k: __mmask16,
9155	a: __m512,
9156	b: __m512,
9157	c: __m512,
9158	) -> __m512 {
9159	static_assert_rounding!(ROUNDING);
9160	let zero: f32x16 = mem::zeroed();
9161	let sub: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9162	let b: f32x16 = b.as_f32x16();
9163	let c: f32x16 = c.as_f32x16();
9164	let r: f32x16 = vfmadd132psround(a:sub, b, c, ROUNDING);
9165	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9166	}
9167
9168	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9169	///
9170	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9171	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9172	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9173	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9174	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9175	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9176	///
9177	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9178	#[inline]
9179	#[target_feature(enable = "avx512f")]
9180	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9181	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9182	#[rustc_legacy_const_generics(`4`)]
9183	pub unsafe fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9184	a: __m512,
9185	b: __m512,
9186	c: __m512,
9187	k: __mmask16,
9188	) -> __m512 {
9189	static_assert_rounding!(ROUNDING);
9190	let zero: f32x16 = mem::zeroed();
9191	let sub: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9192	let b: f32x16 = b.as_f32x16();
9193	let c: f32x16 = c.as_f32x16();
9194	let r: f32x16 = vfmadd132psround(a:sub, b, c, ROUNDING);
9195	transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
9196	}
9197
9198	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9199	///
9200	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9201	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9202	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9203	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9204	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9205	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9206	///
9207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
9208	#[inline]
9209	#[target_feature(enable = "avx512f")]
9210	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9211	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9212	#[rustc_legacy_const_generics(`3`)]
9213	pub unsafe fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(
9214	a: __m512d,
9215	b: __m512d,
9216	c: __m512d,
9217	) -> __m512d {
9218	static_assert_rounding!(ROUNDING);
9219	let zero: f64x8 = mem::zeroed();
9220	let sub: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
9221	let b: f64x8 = b.as_f64x8();
9222	let c: f64x8 = c.as_f64x8();
9223	let r: f64x8 = vfmadd132pdround(a:sub, b, c, ROUNDING);
9224	transmute(src:r)
9225	}
9226
9227	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9228	///
9229	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9230	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9231	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9232	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9233	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9234	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9235	///
9236	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9237	#[inline]
9238	#[target_feature(enable = "avx512f")]
9239	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9240	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9241	#[rustc_legacy_const_generics(`4`)]
9242	pub unsafe fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9243	a: __m512d,
9244	k: __mmask8,
9245	b: __m512d,
9246	c: __m512d,
9247	) -> __m512d {
9248	static_assert_rounding!(ROUNDING);
9249	let zero: f64x8 = mem::zeroed();
9250	let a: f64x8 = a.as_f64x8();
9251	let sub: f64x8 = simd_sub(lhs:zero, rhs:a);
9252	let b: f64x8 = b.as_f64x8();
9253	let c: f64x8 = c.as_f64x8();
9254	let r: f64x8 = vfmadd132pdround(a:sub, b, c, ROUNDING);
9255	transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
9256	}
9257
9258	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9259	///
9260	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9261	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9262	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9263	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9264	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9265	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9266	///
9267	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9268	#[inline]
9269	#[target_feature(enable = "avx512f")]
9270	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9271	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9272	#[rustc_legacy_const_generics(`4`)]
9273	pub unsafe fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9274	k: __mmask8,
9275	a: __m512d,
9276	b: __m512d,
9277	c: __m512d,
9278	) -> __m512d {
9279	static_assert_rounding!(ROUNDING);
9280	let zero: f64x8 = mem::zeroed();
9281	let sub: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
9282	let b: f64x8 = b.as_f64x8();
9283	let c: f64x8 = c.as_f64x8();
9284	let r: f64x8 = vfmadd132pdround(a:sub, b, c, ROUNDING);
9285	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9286	}
9287
9288	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9289	///
9290	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9291	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9292	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9293	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9294	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9295	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9296	///
9297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9298	#[inline]
9299	#[target_feature(enable = "avx512f")]
9300	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9301	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9302	#[rustc_legacy_const_generics(`4`)]
9303	pub unsafe fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9304	a: __m512d,
9305	b: __m512d,
9306	c: __m512d,
9307	k: __mmask8,
9308	) -> __m512d {
9309	static_assert_rounding!(ROUNDING);
9310	let zero: f64x8 = mem::zeroed();
9311	let sub: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
9312	let b: f64x8 = b.as_f64x8();
9313	let c: f64x8 = c.as_f64x8();
9314	let r: f64x8 = vfmadd132pdround(a:sub, b, c, ROUNDING);
9315	transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
9316	}
9317
9318	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9319	///
9320	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9321	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9322	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9323	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9324	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9325	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9326	///
9327	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9328	#[inline]
9329	#[target_feature(enable = "avx512f")]
9330	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9331	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9332	#[rustc_legacy_const_generics(`3`)]
9333	pub unsafe fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(
9334	a: __m512,
9335	b: __m512,
9336	c: __m512,
9337	) -> __m512 {
9338	static_assert_rounding!(ROUNDING);
9339	let zero: f32x16 = mem::zeroed();
9340	let suba: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9341	let subc: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
9342	let b: f32x16 = b.as_f32x16();
9343	let r: f32x16 = vfmadd132psround(a:suba, b, c:subc, ROUNDING);
9344	transmute(src:r)
9345	}
9346
9347	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9348	///
9349	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9350	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9351	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9352	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9353	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9354	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9355	///
9356	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9357	#[inline]
9358	#[target_feature(enable = "avx512f")]
9359	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9360	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9361	#[rustc_legacy_const_generics(`4`)]
9362	pub unsafe fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9363	a: __m512,
9364	k: __mmask16,
9365	b: __m512,
9366	c: __m512,
9367	) -> __m512 {
9368	static_assert_rounding!(ROUNDING);
9369	let zero: f32x16 = mem::zeroed();
9370	let a: f32x16 = a.as_f32x16();
9371	let suba: f32x16 = simd_sub(lhs:zero, rhs:a);
9372	let subc: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
9373	let b: f32x16 = b.as_f32x16();
9374	let r: f32x16 = vfmadd132psround(a:suba, b, c:subc, ROUNDING);
9375	transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
9376	}
9377
9378	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9379	///
9380	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9381	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9382	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9383	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9384	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9385	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9386	///
9387	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9388	#[inline]
9389	#[target_feature(enable = "avx512f")]
9390	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9391	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9392	#[rustc_legacy_const_generics(`4`)]
9393	pub unsafe fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9394	k: __mmask16,
9395	a: __m512,
9396	b: __m512,
9397	c: __m512,
9398	) -> __m512 {
9399	static_assert_rounding!(ROUNDING);
9400	let zero: f32x16 = mem::zeroed();
9401	let suba: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9402	let subc: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
9403	let b: f32x16 = b.as_f32x16();
9404	let r: f32x16 = vfmadd132psround(a:suba, b, c:subc, ROUNDING);
9405	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9406	}
9407
9408	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9409	///
9410	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9411	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9412	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9413	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9414	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9415	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9416	///
9417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9418	#[inline]
9419	#[target_feature(enable = "avx512f")]
9420	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9421	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9422	#[rustc_legacy_const_generics(`4`)]
9423	pub unsafe fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9424	a: __m512,
9425	b: __m512,
9426	c: __m512,
9427	k: __mmask16,
9428	) -> __m512 {
9429	static_assert_rounding!(ROUNDING);
9430	let zero: f32x16 = mem::zeroed();
9431	let suba: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9432	let c: f32x16 = c.as_f32x16();
9433	let subc: f32x16 = simd_sub(lhs:zero, rhs:c);
9434	let b: f32x16 = b.as_f32x16();
9435	let r: f32x16 = vfmadd132psround(a:suba, b, c:subc, ROUNDING);
9436	transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
9437	}
9438
9439	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9440	///
9441	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9442	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9443	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9444	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9445	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9446	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9447	///
9448	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9449	#[inline]
9450	#[target_feature(enable = "avx512f")]
9451	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9452	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9453	#[rustc_legacy_const_generics(`3`)]
9454	pub unsafe fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(
9455	a: __m512d,
9456	b: __m512d,
9457	c: __m512d,
9458	) -> __m512d {
9459	static_assert_rounding!(ROUNDING);
9460	let zero: f64x8 = mem::zeroed();
9461	let suba: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
9462	let subc: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
9463	let b: f64x8 = b.as_f64x8();
9464	let r: f64x8 = vfmadd132pdround(a:suba, b, c:subc, ROUNDING);
9465	transmute(src:r)
9466	}
9467
9468	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9469	///
9470	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9471	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9472	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9473	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9474	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9475	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9476	///
9477	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9478	#[inline]
9479	#[target_feature(enable = "avx512f")]
9480	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9481	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9482	#[rustc_legacy_const_generics(`4`)]
9483	pub unsafe fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9484	a: __m512d,
9485	k: __mmask8,
9486	b: __m512d,
9487	c: __m512d,
9488	) -> __m512d {
9489	static_assert_rounding!(ROUNDING);
9490	let zero: f64x8 = mem::zeroed();
9491	let a: f64x8 = a.as_f64x8();
9492	let suba: f64x8 = simd_sub(lhs:zero, rhs:a);
9493	let subc: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
9494	let b: f64x8 = b.as_f64x8();
9495	let r: f64x8 = vfmadd132pdround(a:suba, b, c:subc, ROUNDING);
9496	transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
9497	}
9498
9499	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9500	///
9501	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9502	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9503	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9504	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9505	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9506	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9507	///
9508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9509	#[inline]
9510	#[target_feature(enable = "avx512f")]
9511	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9512	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9513	#[rustc_legacy_const_generics(`4`)]
9514	pub unsafe fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9515	k: __mmask8,
9516	a: __m512d,
9517	b: __m512d,
9518	c: __m512d,
9519	) -> __m512d {
9520	static_assert_rounding!(ROUNDING);
9521	let zero: f64x8 = mem::zeroed();
9522	let suba: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
9523	let subc: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
9524	let b: f64x8 = b.as_f64x8();
9525	let r: f64x8 = vfmadd132pdround(a:suba, b, c:subc, ROUNDING);
9526	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9527	}
9528
9529	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9530	///
9531	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9532	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9533	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9534	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9535	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9536	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9537	///
9538	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9539	#[inline]
9540	#[target_feature(enable = "avx512f")]
9541	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9542	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9543	#[rustc_legacy_const_generics(`4`)]
9544	pub unsafe fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
9545	a: __m512d,
9546	b: __m512d,
9547	c: __m512d,
9548	k: __mmask8,
9549	) -> __m512d {
9550	static_assert_rounding!(ROUNDING);
9551	let zero: f64x8 = mem::zeroed();
9552	let suba: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
9553	let c: f64x8 = c.as_f64x8();
9554	let subc: f64x8 = simd_sub(lhs:zero, rhs:c);
9555	let b: f64x8 = b.as_f64x8();
9556	let r: f64x8 = vfmadd132pdround(a:suba, b, c:subc, ROUNDING);
9557	transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
9558	}
9559
9560	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9561	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9562	///
9563	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
9564	#[inline]
9565	#[target_feature(enable = "avx512f")]
9566	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9567	#[cfg_attr(test, assert_instr(vmaxps, SAE = `8`))]
9568	#[rustc_legacy_const_generics(`2`)]
9569	pub unsafe fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9570	static_assert_sae!(SAE);
9571	let a: f32x16 = a.as_f32x16();
9572	let b: f32x16 = b.as_f32x16();
9573	let r: f32x16 = vmaxps(a, b, SAE);
9574	transmute(src:r)
9575	}
9576
9577	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9578	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9579	///
9580	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
9581	#[inline]
9582	#[target_feature(enable = "avx512f")]
9583	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9584	#[cfg_attr(test, assert_instr(vmaxps, SAE = `8`))]
9585	#[rustc_legacy_const_generics(`4`)]
9586	pub unsafe fn _mm512_mask_max_round_ps<const SAE: i32>(
9587	src: __m512,
9588	k: __mmask16,
9589	a: __m512,
9590	b: __m512,
9591	) -> __m512 {
9592	static_assert_sae!(SAE);
9593	let a: f32x16 = a.as_f32x16();
9594	let b: f32x16 = b.as_f32x16();
9595	let r: f32x16 = vmaxps(a, b, SAE);
9596	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
9597	}
9598
9599	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9600	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9601	///
9602	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
9603	#[inline]
9604	#[target_feature(enable = "avx512f")]
9605	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9606	#[cfg_attr(test, assert_instr(vmaxps, SAE = `8`))]
9607	#[rustc_legacy_const_generics(`3`)]
9608	pub unsafe fn _mm512_maskz_max_round_ps<const SAE: i32>(
9609	k: __mmask16,
9610	a: __m512,
9611	b: __m512,
9612	) -> __m512 {
9613	static_assert_sae!(SAE);
9614	let a: f32x16 = a.as_f32x16();
9615	let b: f32x16 = b.as_f32x16();
9616	let r: f32x16 = vmaxps(a, b, SAE);
9617	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
9618	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9619	}
9620
9621	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9622	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9623	///
9624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
9625	#[inline]
9626	#[target_feature(enable = "avx512f")]
9627	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9628	#[cfg_attr(test, assert_instr(vmaxpd, SAE = `8`))]
9629	#[rustc_legacy_const_generics(`2`)]
9630	pub unsafe fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9631	static_assert_sae!(SAE);
9632	let a: f64x8 = a.as_f64x8();
9633	let b: f64x8 = b.as_f64x8();
9634	let r: f64x8 = vmaxpd(a, b, SAE);
9635	transmute(src:r)
9636	}
9637
9638	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9639	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9640	///
9641	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
9642	#[inline]
9643	#[target_feature(enable = "avx512f")]
9644	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9645	#[cfg_attr(test, assert_instr(vmaxpd, SAE = `8`))]
9646	#[rustc_legacy_const_generics(`4`)]
9647	pub unsafe fn _mm512_mask_max_round_pd<const SAE: i32>(
9648	src: __m512d,
9649	k: __mmask8,
9650	a: __m512d,
9651	b: __m512d,
9652	) -> __m512d {
9653	static_assert_sae!(SAE);
9654	let a: f64x8 = a.as_f64x8();
9655	let b: f64x8 = b.as_f64x8();
9656	let r: f64x8 = vmaxpd(a, b, SAE);
9657	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
9658	}
9659
9660	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9661	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9662	///
9663	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
9664	#[inline]
9665	#[target_feature(enable = "avx512f")]
9666	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9667	#[cfg_attr(test, assert_instr(vmaxpd, SAE = `8`))]
9668	#[rustc_legacy_const_generics(`3`)]
9669	pub unsafe fn _mm512_maskz_max_round_pd<const SAE: i32>(
9670	k: __mmask8,
9671	a: __m512d,
9672	b: __m512d,
9673	) -> __m512d {
9674	static_assert_sae!(SAE);
9675	let a: f64x8 = a.as_f64x8();
9676	let b: f64x8 = b.as_f64x8();
9677	let r: f64x8 = vmaxpd(a, b, SAE);
9678	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
9679	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9680	}
9681
9682	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9683	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9684	///
9685	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
9686	#[inline]
9687	#[target_feature(enable = "avx512f")]
9688	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9689	#[cfg_attr(test, assert_instr(vminps, SAE = `8`))]
9690	#[rustc_legacy_const_generics(`2`)]
9691	pub unsafe fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9692	static_assert_sae!(SAE);
9693	let a: f32x16 = a.as_f32x16();
9694	let b: f32x16 = b.as_f32x16();
9695	let r: f32x16 = vminps(a, b, SAE);
9696	transmute(src:r)
9697	}
9698
9699	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9700	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9701	///
9702	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
9703	#[inline]
9704	#[target_feature(enable = "avx512f")]
9705	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9706	#[cfg_attr(test, assert_instr(vminps, SAE = `8`))]
9707	#[rustc_legacy_const_generics(`4`)]
9708	pub unsafe fn _mm512_mask_min_round_ps<const SAE: i32>(
9709	src: __m512,
9710	k: __mmask16,
9711	a: __m512,
9712	b: __m512,
9713	) -> __m512 {
9714	static_assert_sae!(SAE);
9715	let a: f32x16 = a.as_f32x16();
9716	let b: f32x16 = b.as_f32x16();
9717	let r: f32x16 = vminps(a, b, SAE);
9718	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
9719	}
9720
9721	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9722	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9723	///
9724	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
9725	#[inline]
9726	#[target_feature(enable = "avx512f")]
9727	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9728	#[cfg_attr(test, assert_instr(vminps, SAE = `8`))]
9729	#[rustc_legacy_const_generics(`3`)]
9730	pub unsafe fn _mm512_maskz_min_round_ps<const SAE: i32>(
9731	k: __mmask16,
9732	a: __m512,
9733	b: __m512,
9734	) -> __m512 {
9735	static_assert_sae!(SAE);
9736	let a: f32x16 = a.as_f32x16();
9737	let b: f32x16 = b.as_f32x16();
9738	let r: f32x16 = vminps(a, b, SAE);
9739	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
9740	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9741	}
9742
9743	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9744	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9745	///
9746	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
9747	#[inline]
9748	#[target_feature(enable = "avx512f")]
9749	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9750	#[cfg_attr(test, assert_instr(vminpd, SAE = `8`))]
9751	#[rustc_legacy_const_generics(`2`)]
9752	pub unsafe fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9753	static_assert_sae!(SAE);
9754	let a: f64x8 = a.as_f64x8();
9755	let b: f64x8 = b.as_f64x8();
9756	let r: f64x8 = vminpd(a, b, SAE);
9757	transmute(src:r)
9758	}
9759
9760	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9761	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9762	///
9763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
9764	#[inline]
9765	#[target_feature(enable = "avx512f")]
9766	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9767	#[cfg_attr(test, assert_instr(vminpd, SAE = `8`))]
9768	#[rustc_legacy_const_generics(`4`)]
9769	pub unsafe fn _mm512_mask_min_round_pd<const SAE: i32>(
9770	src: __m512d,
9771	k: __mmask8,
9772	a: __m512d,
9773	b: __m512d,
9774	) -> __m512d {
9775	static_assert_sae!(SAE);
9776	let a: f64x8 = a.as_f64x8();
9777	let b: f64x8 = b.as_f64x8();
9778	let r: f64x8 = vminpd(a, b, SAE);
9779	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
9780	}
9781
9782	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9783	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9784	///
9785	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
9786	#[inline]
9787	#[target_feature(enable = "avx512f")]
9788	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9789	#[cfg_attr(test, assert_instr(vminpd, SAE = `8`))]
9790	#[rustc_legacy_const_generics(`3`)]
9791	pub unsafe fn _mm512_maskz_min_round_pd<const SAE: i32>(
9792	k: __mmask8,
9793	a: __m512d,
9794	b: __m512d,
9795	) -> __m512d {
9796	static_assert_sae!(SAE);
9797	let a: f64x8 = a.as_f64x8();
9798	let b: f64x8 = b.as_f64x8();
9799	let r: f64x8 = vminpd(a, b, SAE);
9800	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
9801	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9802	}
9803
9804	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9805	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9806	///
9807	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
9808	#[inline]
9809	#[target_feature(enable = "avx512f")]
9810	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9811	#[cfg_attr(test, assert_instr(vgetexpps, SAE = `8`))]
9812	#[rustc_legacy_const_generics(`1`)]
9813	pub unsafe fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
9814	static_assert_sae!(SAE);
9815	let a: f32x16 = a.as_f32x16();
9816	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
9817	let r: f32x16 = vgetexpps(a, src:zero, m:`0b11111111_11111111`, SAE);
9818	transmute(src:r)
9819	}
9820
9821	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
9822	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9823	///
9824	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
9825	#[inline]
9826	#[target_feature(enable = "avx512f")]
9827	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9828	#[cfg_attr(test, assert_instr(vgetexpps, SAE = `8`))]
9829	#[rustc_legacy_const_generics(`3`)]
9830	pub unsafe fn _mm512_mask_getexp_round_ps<const SAE: i32>(
9831	src: __m512,
9832	k: __mmask16,
9833	a: __m512,
9834	) -> __m512 {
9835	static_assert_sae!(SAE);
9836	let a: f32x16 = a.as_f32x16();
9837	let src: f32x16 = src.as_f32x16();
9838	let r: f32x16 = vgetexpps(a, src, m:k, SAE);
9839	transmute(src:r)
9840	}
9841
9842	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
9843	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9844	///
9845	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
9846	#[inline]
9847	#[target_feature(enable = "avx512f")]
9848	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9849	#[cfg_attr(test, assert_instr(vgetexpps, SAE = `8`))]
9850	#[rustc_legacy_const_generics(`2`)]
9851	pub unsafe fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
9852	static_assert_sae!(SAE);
9853	let a: f32x16 = a.as_f32x16();
9854	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
9855	let r: f32x16 = vgetexpps(a, src:zero, m:k, SAE);
9856	transmute(src:r)
9857	}
9858
9859	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9860	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9861	///
9862	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
9863	#[inline]
9864	#[target_feature(enable = "avx512f")]
9865	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9866	#[cfg_attr(test, assert_instr(vgetexppd, SAE = `8`))]
9867	#[rustc_legacy_const_generics(`1`)]
9868	pub unsafe fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
9869	static_assert_sae!(SAE);
9870	let a: f64x8 = a.as_f64x8();
9871	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
9872	let r: f64x8 = vgetexppd(a, src:zero, m:`0b11111111`, SAE);
9873	transmute(src:r)
9874	}
9875
9876	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
9877	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9878	///
9879	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
9880	#[inline]
9881	#[target_feature(enable = "avx512f")]
9882	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9883	#[cfg_attr(test, assert_instr(vgetexppd, SAE = `8`))]
9884	#[rustc_legacy_const_generics(`3`)]
9885	pub unsafe fn _mm512_mask_getexp_round_pd<const SAE: i32>(
9886	src: __m512d,
9887	k: __mmask8,
9888	a: __m512d,
9889	) -> __m512d {
9890	static_assert_sae!(SAE);
9891	let a: f64x8 = a.as_f64x8();
9892	let src: f64x8 = src.as_f64x8();
9893	let r: f64x8 = vgetexppd(a, src, m:k, SAE);
9894	transmute(src:r)
9895	}
9896
9897	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
9898	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9899	///
9900	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
9901	#[inline]
9902	#[target_feature(enable = "avx512f")]
9903	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9904	#[cfg_attr(test, assert_instr(vgetexppd, SAE = `8`))]
9905	#[rustc_legacy_const_generics(`2`)]
9906	pub unsafe fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
9907	static_assert_sae!(SAE);
9908	let a: f64x8 = a.as_f64x8();
9909	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
9910	let r: f64x8 = vgetexppd(a, src:zero, m:k, SAE);
9911	transmute(src:r)
9912	}
9913
9914	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
9915	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9916	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
9917	/// _MM_FROUND_TO_NEG_INF // round down\
9918	/// _MM_FROUND_TO_POS_INF // round up\
9919	/// _MM_FROUND_TO_ZERO // truncate\
9920	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
9921	///
9922	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
9924	#[inline]
9925	#[target_feature(enable = "avx512f")]
9926	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9927	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`, SAE = `8`))]
9928	#[rustc_legacy_const_generics(`1`, `2`)]
9929	pub unsafe fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
9930	static_assert_uimm_bits!(IMM8, `8`);
9931	static_assert_mantissas_sae!(SAE);
9932	let a: f32x16 = a.as_f32x16();
9933	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
9934	let r: f32x16 = vrndscaleps(a, IMM8, src:zero, mask:`0b11111111_11111111`, SAE);
9935	transmute(src:r)
9936	}
9937
9938	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9939	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9940	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
9941	/// _MM_FROUND_TO_NEG_INF // round down\
9942	/// _MM_FROUND_TO_POS_INF // round up\
9943	/// _MM_FROUND_TO_ZERO // truncate\
9944	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
9945	///
9946	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9947	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
9948	#[inline]
9949	#[target_feature(enable = "avx512f")]
9950	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9951	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`, SAE = `8`))]
9952	#[rustc_legacy_const_generics(`3`, `4`)]
9953	pub unsafe fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
9954	src: __m512,
9955	k: __mmask16,
9956	a: __m512,
9957	) -> __m512 {
9958	static_assert_uimm_bits!(IMM8, `8`);
9959	static_assert_mantissas_sae!(SAE);
9960	let a: f32x16 = a.as_f32x16();
9961	let src: f32x16 = src.as_f32x16();
9962	let r: f32x16 = vrndscaleps(a, IMM8, src, mask:k, SAE);
9963	transmute(src:r)
9964	}
9965
9966	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9967	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9968	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
9969	/// _MM_FROUND_TO_NEG_INF // round down\
9970	/// _MM_FROUND_TO_POS_INF // round up\
9971	/// _MM_FROUND_TO_ZERO // truncate\
9972	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
9973	///
9974	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9975	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
9976	#[inline]
9977	#[target_feature(enable = "avx512f")]
9978	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9979	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`, SAE = `8`))]
9980	#[rustc_legacy_const_generics(`2`, `3`)]
9981	pub unsafe fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
9982	k: __mmask16,
9983	a: __m512,
9984	) -> __m512 {
9985	static_assert_uimm_bits!(IMM8, `8`);
9986	static_assert_mantissas_sae!(SAE);
9987	let a: f32x16 = a.as_f32x16();
9988	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
9989	let r: f32x16 = vrndscaleps(a, IMM8, src:zero, mask:k, SAE);
9990	transmute(src:r)
9991	}
9992
9993	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
9994	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9995	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
9996	/// _MM_FROUND_TO_NEG_INF // round down\
9997	/// _MM_FROUND_TO_POS_INF // round up\
9998	/// _MM_FROUND_TO_ZERO // truncate\
9999	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
10000	///
10001	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10003	#[inline]
10004	#[target_feature(enable = "avx512f")]
10005	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10006	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`, SAE = `8`))]
10007	#[rustc_legacy_const_generics(`1`, `2`)]
10008	pub unsafe fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10009	static_assert_uimm_bits!(IMM8, `8`);
10010	static_assert_mantissas_sae!(SAE);
10011	let a: f64x8 = a.as_f64x8();
10012	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
10013	let r: f64x8 = vrndscalepd(a, IMM8, src:zero, mask:`0b11111111`, SAE);
10014	transmute(src:r)
10015	}
10016
10017	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10018	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10019	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
10020	/// _MM_FROUND_TO_NEG_INF // round down\
10021	/// _MM_FROUND_TO_POS_INF // round up\
10022	/// _MM_FROUND_TO_ZERO // truncate\
10023	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
10024	///
10025	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10026	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10027	#[inline]
10028	#[target_feature(enable = "avx512f")]
10029	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10030	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`, SAE = `8`))]
10031	#[rustc_legacy_const_generics(`3`, `4`)]
10032	pub unsafe fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10033	src: __m512d,
10034	k: __mmask8,
10035	a: __m512d,
10036	) -> __m512d {
10037	static_assert_uimm_bits!(IMM8, `8`);
10038	static_assert_mantissas_sae!(SAE);
10039	let a: f64x8 = a.as_f64x8();
10040	let src: f64x8 = src.as_f64x8();
10041	let r: f64x8 = vrndscalepd(a, IMM8, src, mask:k, SAE);
10042	transmute(src:r)
10043	}
10044
10045	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10046	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10047	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
10048	/// _MM_FROUND_TO_NEG_INF // round down\
10049	/// _MM_FROUND_TO_POS_INF // round up\
10050	/// _MM_FROUND_TO_ZERO // truncate\
10051	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
10052	///
10053	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10054	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10055	#[inline]
10056	#[target_feature(enable = "avx512f")]
10057	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10058	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`, SAE = `8`))]
10059	#[rustc_legacy_const_generics(`2`, `3`)]
10060	pub unsafe fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10061	k: __mmask8,
10062	a: __m512d,
10063	) -> __m512d {
10064	static_assert_uimm_bits!(IMM8, `8`);
10065	static_assert_mantissas_sae!(SAE);
10066	let a: f64x8 = a.as_f64x8();
10067	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
10068	let r: f64x8 = vrndscalepd(a, IMM8, src:zero, mask:k, SAE);
10069	transmute(src:r)
10070	}
10071
10072	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10073	///
10074	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10075	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
10076	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
10077	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
10078	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
10079	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
10080	///
10081	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10082	#[inline]
10083	#[target_feature(enable = "avx512f")]
10084	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10085	#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = `8`))]
10086	#[rustc_legacy_const_generics(`2`)]
10087	pub unsafe fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10088	static_assert_rounding!(ROUNDING);
10089	let a: f32x16 = a.as_f32x16();
10090	let b: f32x16 = b.as_f32x16();
10091	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
10092	let r: f32x16 = vscalefps(a, b, src:zero, mask:`0b11111111_11111111`, ROUNDING);
10093	transmute(src:r)
10094	}
10095
10096	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10097	///
10098	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10099	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
10100	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
10101	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
10102	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
10103	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
10104	///
10105	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10106	#[inline]
10107	#[target_feature(enable = "avx512f")]
10108	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10109	#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = `8`))]
10110	#[rustc_legacy_const_generics(`4`)]
10111	pub unsafe fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10112	src: __m512,
10113	k: __mmask16,
10114	a: __m512,
10115	b: __m512,
10116	) -> __m512 {
10117	static_assert_rounding!(ROUNDING);
10118	let a: f32x16 = a.as_f32x16();
10119	let b: f32x16 = b.as_f32x16();
10120	let src: f32x16 = src.as_f32x16();
10121	let r: f32x16 = vscalefps(a, b, src, mask:k, ROUNDING);
10122	transmute(src:r)
10123	}
10124
10125	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10126	///
10127	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10128	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
10129	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
10130	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
10131	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
10132	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
10133	///
10134	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10135	#[inline]
10136	#[target_feature(enable = "avx512f")]
10137	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10138	#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = `8`))]
10139	#[rustc_legacy_const_generics(`3`)]
10140	pub unsafe fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10141	k: __mmask16,
10142	a: __m512,
10143	b: __m512,
10144	) -> __m512 {
10145	static_assert_rounding!(ROUNDING);
10146	let a: f32x16 = a.as_f32x16();
10147	let b: f32x16 = b.as_f32x16();
10148	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
10149	let r: f32x16 = vscalefps(a, b, src:zero, mask:k, ROUNDING);
10150	transmute(src:r)
10151	}
10152
10153	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10154	///
10155	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10156	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
10157	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
10158	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
10159	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
10160	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
10161	///
10162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10163	#[inline]
10164	#[target_feature(enable = "avx512f")]
10165	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10166	#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = `8`))]
10167	#[rustc_legacy_const_generics(`2`)]
10168	pub unsafe fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10169	static_assert_rounding!(ROUNDING);
10170	let a: f64x8 = a.as_f64x8();
10171	let b: f64x8 = b.as_f64x8();
10172	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
10173	let r: f64x8 = vscalefpd(a, b, src:zero, mask:`0b11111111`, ROUNDING);
10174	transmute(src:r)
10175	}
10176
10177	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10178	///
10179	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10180	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
10181	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
10182	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
10183	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
10184	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
10185	///
10186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10187	#[inline]
10188	#[target_feature(enable = "avx512f")]
10189	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10190	#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = `8`))]
10191	#[rustc_legacy_const_generics(`4`)]
10192	pub unsafe fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10193	src: __m512d,
10194	k: __mmask8,
10195	a: __m512d,
10196	b: __m512d,
10197	) -> __m512d {
10198	static_assert_rounding!(ROUNDING);
10199	let a: f64x8 = a.as_f64x8();
10200	let b: f64x8 = b.as_f64x8();
10201	let src: f64x8 = src.as_f64x8();
10202	let r: f64x8 = vscalefpd(a, b, src, mask:k, ROUNDING);
10203	transmute(src:r)
10204	}
10205
10206	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10207	///
10208	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10209	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
10210	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
10211	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
10212	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
10213	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
10214	///
10215	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10216	#[inline]
10217	#[target_feature(enable = "avx512f")]
10218	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10219	#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = `8`))]
10220	#[rustc_legacy_const_generics(`3`)]
10221	pub unsafe fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10222	k: __mmask8,
10223	a: __m512d,
10224	b: __m512d,
10225	) -> __m512d {
10226	static_assert_rounding!(ROUNDING);
10227	let a: f64x8 = a.as_f64x8();
10228	let b: f64x8 = b.as_f64x8();
10229	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
10230	let r: f64x8 = vscalefpd(a, b, src:zero, mask:k, ROUNDING);
10231	transmute(src:r)
10232	}
10233
10234	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10235	///
10236	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10238	#[inline]
10239	#[target_feature(enable = "avx512f")]
10240	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10241	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`, SAE = `8`))]
10242	#[rustc_legacy_const_generics(`3`, `4`)]
10243	pub unsafe fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10244	a: __m512,
10245	b: __m512,
10246	c: __m512i,
10247	) -> __m512 {
10248	static_assert_uimm_bits!(IMM8, `8`);
10249	static_assert_mantissas_sae!(SAE);
10250	let a: f32x16 = a.as_f32x16();
10251	let b: f32x16 = b.as_f32x16();
10252	let c: i32x16 = c.as_i32x16();
10253	let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:`0b11111111_11111111`, SAE);
10254	transmute(src:r)
10255	}
10256
10257	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10258	///
10259	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10260	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10261	#[inline]
10262	#[target_feature(enable = "avx512f")]
10263	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10264	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`, SAE = `8`))]
10265	#[rustc_legacy_const_generics(`4`, `5`)]
10266	pub unsafe fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10267	a: __m512,
10268	k: __mmask16,
10269	b: __m512,
10270	c: __m512i,
10271	) -> __m512 {
10272	static_assert_uimm_bits!(IMM8, `8`);
10273	static_assert_mantissas_sae!(SAE);
10274	let a: f32x16 = a.as_f32x16();
10275	let b: f32x16 = b.as_f32x16();
10276	let c: i32x16 = c.as_i32x16();
10277	let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:k, SAE);
10278	transmute(src:r)
10279	}
10280
10281	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10282	///
10283	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10285	#[inline]
10286	#[target_feature(enable = "avx512f")]
10287	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10288	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`, SAE = `8`))]
10289	#[rustc_legacy_const_generics(`4`, `5`)]
10290	pub unsafe fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10291	k: __mmask16,
10292	a: __m512,
10293	b: __m512,
10294	c: __m512i,
10295	) -> __m512 {
10296	static_assert_uimm_bits!(IMM8, `8`);
10297	static_assert_mantissas_sae!(SAE);
10298	let a: f32x16 = a.as_f32x16();
10299	let b: f32x16 = b.as_f32x16();
10300	let c: i32x16 = c.as_i32x16();
10301	let r: f32x16 = vfixupimmpsz(a, b, c, IMM8, mask:k, SAE);
10302	transmute(src:r)
10303	}
10304
10305	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10306	///
10307	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10309	#[inline]
10310	#[target_feature(enable = "avx512f")]
10311	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10312	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`, SAE = `8`))]
10313	#[rustc_legacy_const_generics(`3`, `4`)]
10314	pub unsafe fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10315	a: __m512d,
10316	b: __m512d,
10317	c: __m512i,
10318	) -> __m512d {
10319	static_assert_uimm_bits!(IMM8, `8`);
10320	static_assert_mantissas_sae!(SAE);
10321	let a: f64x8 = a.as_f64x8();
10322	let b: f64x8 = b.as_f64x8();
10323	let c: i64x8 = c.as_i64x8();
10324	let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:`0b11111111`, SAE);
10325	transmute(src:r)
10326	}
10327
10328	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10329	///
10330	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10331	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10332	#[inline]
10333	#[target_feature(enable = "avx512f")]
10334	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10335	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`, SAE = `8`))]
10336	#[rustc_legacy_const_generics(`4`, `5`)]
10337	pub unsafe fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10338	a: __m512d,
10339	k: __mmask8,
10340	b: __m512d,
10341	c: __m512i,
10342	) -> __m512d {
10343	static_assert_uimm_bits!(IMM8, `8`);
10344	static_assert_mantissas_sae!(SAE);
10345	let a: f64x8 = a.as_f64x8();
10346	let b: f64x8 = b.as_f64x8();
10347	let c: i64x8 = c.as_i64x8();
10348	let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:k, SAE);
10349	transmute(src:r)
10350	}
10351
10352	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10353	///
10354	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10355	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10356	#[inline]
10357	#[target_feature(enable = "avx512f")]
10358	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10359	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`, SAE = `8`))]
10360	#[rustc_legacy_const_generics(`4`, `5`)]
10361	pub unsafe fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10362	k: __mmask8,
10363	a: __m512d,
10364	b: __m512d,
10365	c: __m512i,
10366	) -> __m512d {
10367	static_assert_uimm_bits!(IMM8, `8`);
10368	static_assert_mantissas_sae!(SAE);
10369	let a: f64x8 = a.as_f64x8();
10370	let b: f64x8 = b.as_f64x8();
10371	let c: i64x8 = c.as_i64x8();
10372	let r: f64x8 = vfixupimmpdz(a, b, c, IMM8, mask:k, SAE);
10373	transmute(src:r)
10374	}
10375
10376	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10377	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10378	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10379	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10380	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10381	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10382	/// The sign is determined by sc which can take the following values:\
10383	/// _MM_MANT_SIGN_src // sign = sign(src)\
10384	/// _MM_MANT_SIGN_zero // sign = 0\
10385	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10386	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10387	///
10388	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10389	#[inline]
10390	#[target_feature(enable = "avx512f")]
10391	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10392	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`, SAE = `4`))]
10393	#[rustc_legacy_const_generics(`1`, `2`, `3`)]
10394	pub unsafe fn _mm512_getmant_round_ps<
10395	const NORM: _MM_MANTISSA_NORM_ENUM,
10396	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10397	const SAE: i32,
10398	>(
10399	a: __m512,
10400	) -> __m512 {
10401	static_assert_uimm_bits!(NORM, `4`);
10402	static_assert_uimm_bits!(SIGN, `2`);
10403	static_assert_mantissas_sae!(SAE);
10404	let a: f32x16 = a.as_f32x16();
10405	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
10406	let r: f32x16 = vgetmantps(a, SIGN << `2` \| NORM, src:zero, m:`0b11111111_11111111`, SAE);
10407	transmute(src:r)
10408	}
10409
10410	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10411	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10412	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10413	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10414	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10415	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10416	/// The sign is determined by sc which can take the following values:\
10417	/// _MM_MANT_SIGN_src // sign = sign(src)\
10418	/// _MM_MANT_SIGN_zero // sign = 0\
10419	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10420	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10421	///
10422	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10423	#[inline]
10424	#[target_feature(enable = "avx512f")]
10425	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10426	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`, SAE = `4`))]
10427	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
10428	pub unsafe fn _mm512_mask_getmant_round_ps<
10429	const NORM: _MM_MANTISSA_NORM_ENUM,
10430	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10431	const SAE: i32,
10432	>(
10433	src: __m512,
10434	k: __mmask16,
10435	a: __m512,
10436	) -> __m512 {
10437	static_assert_uimm_bits!(NORM, `4`);
10438	static_assert_uimm_bits!(SIGN, `2`);
10439	static_assert_mantissas_sae!(SAE);
10440	let a: f32x16 = a.as_f32x16();
10441	let src: f32x16 = src.as_f32x16();
10442	let r: f32x16 = vgetmantps(a, SIGN << `2` \| NORM, src, m:k, SAE);
10443	transmute(src:r)
10444	}
10445
10446	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10447	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10448	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10449	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10450	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10451	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10452	/// The sign is determined by sc which can take the following values:\
10453	/// _MM_MANT_SIGN_src // sign = sign(src)\
10454	/// _MM_MANT_SIGN_zero // sign = 0\
10455	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10456	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10457	///
10458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10459	#[inline]
10460	#[target_feature(enable = "avx512f")]
10461	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10462	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`, SAE = `4`))]
10463	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
10464	pub unsafe fn _mm512_maskz_getmant_round_ps<
10465	const NORM: _MM_MANTISSA_NORM_ENUM,
10466	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10467	const SAE: i32,
10468	>(
10469	k: __mmask16,
10470	a: __m512,
10471	) -> __m512 {
10472	static_assert_uimm_bits!(NORM, `4`);
10473	static_assert_uimm_bits!(SIGN, `2`);
10474	static_assert_mantissas_sae!(SAE);
10475	let a: f32x16 = a.as_f32x16();
10476	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
10477	let r: f32x16 = vgetmantps(a, SIGN << `2` \| NORM, src:zero, m:k, SAE);
10478	transmute(src:r)
10479	}
10480
10481	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10482	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10483	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10484	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10485	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10486	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10487	/// The sign is determined by sc which can take the following values:\
10488	/// _MM_MANT_SIGN_src // sign = sign(src)\
10489	/// _MM_MANT_SIGN_zero // sign = 0\
10490	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10491	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10492	///
10493	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10494	#[inline]
10495	#[target_feature(enable = "avx512f")]
10496	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10497	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`, SAE = `4`))]
10498	#[rustc_legacy_const_generics(`1`, `2`, `3`)]
10499	pub unsafe fn _mm512_getmant_round_pd<
10500	const NORM: _MM_MANTISSA_NORM_ENUM,
10501	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10502	const SAE: i32,
10503	>(
10504	a: __m512d,
10505	) -> __m512d {
10506	static_assert_uimm_bits!(NORM, `4`);
10507	static_assert_uimm_bits!(SIGN, `2`);
10508	static_assert_mantissas_sae!(SAE);
10509	let a: f64x8 = a.as_f64x8();
10510	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
10511	let r: f64x8 = vgetmantpd(a, SIGN << `2` \| NORM, src:zero, m:`0b11111111`, SAE);
10512	transmute(src:r)
10513	}
10514
10515	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10516	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10517	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10518	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10519	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10520	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10521	/// The sign is determined by sc which can take the following values:\
10522	/// _MM_MANT_SIGN_src // sign = sign(src)\
10523	/// _MM_MANT_SIGN_zero // sign = 0\
10524	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10525	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10526	///
10527	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
10528	#[inline]
10529	#[target_feature(enable = "avx512f")]
10530	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10531	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`, SAE = `4`))]
10532	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
10533	pub unsafe fn _mm512_mask_getmant_round_pd<
10534	const NORM: _MM_MANTISSA_NORM_ENUM,
10535	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10536	const SAE: i32,
10537	>(
10538	src: __m512d,
10539	k: __mmask8,
10540	a: __m512d,
10541	) -> __m512d {
10542	static_assert_uimm_bits!(NORM, `4`);
10543	static_assert_uimm_bits!(SIGN, `2`);
10544	static_assert_mantissas_sae!(SAE);
10545	let a: f64x8 = a.as_f64x8();
10546	let src: f64x8 = src.as_f64x8();
10547	let r: f64x8 = vgetmantpd(a, SIGN << `2` \| NORM, src, m:k, SAE);
10548	transmute(src:r)
10549	}
10550
10551	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10552	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10553	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10554	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10555	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10556	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10557	/// The sign is determined by sc which can take the following values:\
10558	/// _MM_MANT_SIGN_src // sign = sign(src)\
10559	/// _MM_MANT_SIGN_zero // sign = 0\
10560	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10561	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10562	///
10563	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_512_maskz_getmant_round_pd&expand=2885)
10564	#[inline]
10565	#[target_feature(enable = "avx512f")]
10566	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10567	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`, SAE = `4`))]
10568	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
10569	pub unsafe fn _mm512_maskz_getmant_round_pd<
10570	const NORM: _MM_MANTISSA_NORM_ENUM,
10571	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10572	const SAE: i32,
10573	>(
10574	k: __mmask8,
10575	a: __m512d,
10576	) -> __m512d {
10577	static_assert_uimm_bits!(NORM, `4`);
10578	static_assert_uimm_bits!(SIGN, `2`);
10579	static_assert_mantissas_sae!(SAE);
10580	let a: f64x8 = a.as_f64x8();
10581	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
10582	let r: f64x8 = vgetmantpd(a, SIGN << `2` \| NORM, src:zero, m:k, SAE);
10583	transmute(src:r)
10584	}
10585
10586	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10587	///
10588	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
10589	#[inline]
10590	#[target_feature(enable = "avx512f")]
10591	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10592	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10593	pub unsafe fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
10594	transmute(src:vcvtps2dq(
10595	a:a.as_f32x16(),
10596	src:_mm512_setzero_si512().as_i32x16(),
10597	mask:`0b11111111_11111111`,
10598	_MM_FROUND_CUR_DIRECTION,
10599	))
10600	}
10601
10602	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10603	///
10604	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
10605	#[inline]
10606	#[target_feature(enable = "avx512f")]
10607	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10608	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10609	pub unsafe fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10610	transmute(src:vcvtps2dq(
10611	a:a.as_f32x16(),
10612	src:src.as_i32x16(),
10613	mask:k,
10614	_MM_FROUND_CUR_DIRECTION,
10615	))
10616	}
10617
10618	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10619	///
10620	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
10621	#[inline]
10622	#[target_feature(enable = "avx512f")]
10623	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10624	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10625	pub unsafe fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
10626	transmute(src:vcvtps2dq(
10627	a:a.as_f32x16(),
10628	src:_mm512_setzero_si512().as_i32x16(),
10629	mask:k,
10630	_MM_FROUND_CUR_DIRECTION,
10631	))
10632	}
10633
10634	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10635	///
10636	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
10637	#[inline]
10638	#[target_feature(enable = "avx512f,avx512vl")]
10639	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10640	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10641	pub unsafe fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10642	let convert: __m256i = _mm256_cvtps_epi32(a);
10643	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:src.as_i32x8()))
10644	}
10645
10646	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10647	///
10648	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
10649	#[inline]
10650	#[target_feature(enable = "avx512f,avx512vl")]
10651	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10652	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10653	pub unsafe fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
10654	let convert: __m256i = _mm256_cvtps_epi32(a);
10655	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
10656	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:zero))
10657	}
10658
10659	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10660	///
10661	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
10662	#[inline]
10663	#[target_feature(enable = "avx512f,avx512vl")]
10664	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10665	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10666	pub unsafe fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10667	let convert: __m128i = _mm_cvtps_epi32(a);
10668	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
10669	}
10670
10671	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10672	///
10673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
10674	#[inline]
10675	#[target_feature(enable = "avx512f,avx512vl")]
10676	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10677	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10678	pub unsafe fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
10679	let convert: __m128i = _mm_cvtps_epi32(a);
10680	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
10681	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:zero))
10682	}
10683
10684	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10685	///
10686	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
10687	#[inline]
10688	#[target_feature(enable = "avx512f")]
10689	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10690	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10691	pub unsafe fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
10692	transmute(src:vcvtps2udq(
10693	a:a.as_f32x16(),
10694	src:_mm512_setzero_si512().as_u32x16(),
10695	mask:`0b11111111_11111111`,
10696	_MM_FROUND_CUR_DIRECTION,
10697	))
10698	}
10699
10700	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10701	///
10702	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
10703	#[inline]
10704	#[target_feature(enable = "avx512f")]
10705	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10706	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10707	pub unsafe fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10708	transmute(src:vcvtps2udq(
10709	a:a.as_f32x16(),
10710	src:src.as_u32x16(),
10711	mask:k,
10712	_MM_FROUND_CUR_DIRECTION,
10713	))
10714	}
10715
10716	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10717	///
10718	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
10719	#[inline]
10720	#[target_feature(enable = "avx512f")]
10721	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10722	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10723	pub unsafe fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
10724	transmute(src:vcvtps2udq(
10725	a:a.as_f32x16(),
10726	src:_mm512_setzero_si512().as_u32x16(),
10727	mask:k,
10728	_MM_FROUND_CUR_DIRECTION,
10729	))
10730	}
10731
10732	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10733	///
10734	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
10735	#[inline]
10736	#[target_feature(enable = "avx512f,avx512vl")]
10737	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10738	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10739	pub unsafe fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
10740	transmute(src:vcvtps2udq256(
10741	a:a.as_f32x8(),
10742	src:_mm256_setzero_si256().as_u32x8(),
10743	mask:`0b11111111`,
10744	))
10745	}
10746
10747	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10748	///
10749	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
10750	#[inline]
10751	#[target_feature(enable = "avx512f,avx512vl")]
10752	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10753	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10754	pub unsafe fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10755	transmute(src:vcvtps2udq256(a:a.as_f32x8(), src:src.as_u32x8(), mask:k))
10756	}
10757
10758	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10759	///
10760	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
10761	#[inline]
10762	#[target_feature(enable = "avx512f,avx512vl")]
10763	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10764	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10765	pub unsafe fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
10766	transmute(src:vcvtps2udq256(
10767	a:a.as_f32x8(),
10768	src:_mm256_setzero_si256().as_u32x8(),
10769	mask:k,
10770	))
10771	}
10772
10773	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10774	///
10775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
10776	#[inline]
10777	#[target_feature(enable = "avx512f,avx512vl")]
10778	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10779	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10780	pub unsafe fn _mm_cvtps_epu32(a: __m128) -> __m128i {
10781	transmute(src:vcvtps2udq128(
10782	a:a.as_f32x4(),
10783	src:_mm_setzero_si128().as_u32x4(),
10784	mask:`0b11111111`,
10785	))
10786	}
10787
10788	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10789	///
10790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
10791	#[inline]
10792	#[target_feature(enable = "avx512f,avx512vl")]
10793	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10794	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10795	pub unsafe fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10796	transmute(src:vcvtps2udq128(a:a.as_f32x4(), src:src.as_u32x4(), mask:k))
10797	}
10798
10799	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10800	///
10801	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
10802	#[inline]
10803	#[target_feature(enable = "avx512f,avx512vl")]
10804	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10805	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10806	pub unsafe fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
10807	transmute(src:vcvtps2udq128(
10808	a:a.as_f32x4(),
10809	src:_mm_setzero_si128().as_u32x4(),
10810	mask:k,
10811	))
10812	}
10813
10814	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
10815	///
10816	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
10817	#[inline]
10818	#[target_feature(enable = "avx512f")]
10819	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10820	#[cfg_attr(test, assert_instr(vcvtps2pd))]
10821	pub unsafe fn _mm512_cvtps_pd(a: __m256) -> __m512d {
10822	transmute(src:vcvtps2pd(
10823	a:a.as_f32x8(),
10824	src:_mm512_setzero_pd().as_f64x8(),
10825	mask:`0b11111111`,
10826	_MM_FROUND_CUR_DIRECTION,
10827	))
10828	}
10829
10830	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10831	///
10832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
10833	#[inline]
10834	#[target_feature(enable = "avx512f")]
10835	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10836	#[cfg_attr(test, assert_instr(vcvtps2pd))]
10837	pub unsafe fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
10838	transmute(src:vcvtps2pd(
10839	a:a.as_f32x8(),
10840	src:src.as_f64x8(),
10841	mask:k,
10842	_MM_FROUND_CUR_DIRECTION,
10843	))
10844	}
10845
10846	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10847	///
10848	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
10849	#[inline]
10850	#[target_feature(enable = "avx512f")]
10851	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10852	#[cfg_attr(test, assert_instr(vcvtps2pd))]
10853	pub unsafe fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
10854	transmute(src:vcvtps2pd(
10855	a:a.as_f32x8(),
10856	src:_mm512_setzero_pd().as_f64x8(),
10857	mask:k,
10858	_MM_FROUND_CUR_DIRECTION,
10859	))
10860	}
10861
10862	/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
10863	///
10864	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
10865	#[inline]
10866	#[target_feature(enable = "avx512f")]
10867	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10868	#[cfg_attr(test, assert_instr(vcvtps2pd))]
10869	pub unsafe fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
10870	transmute(src:vcvtps2pd(
10871	a:_mm512_castps512_ps256(v2).as_f32x8(),
10872	src:_mm512_setzero_pd().as_f64x8(),
10873	mask:`0b11111111`,
10874	_MM_FROUND_CUR_DIRECTION,
10875	))
10876	}
10877
10878	/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10879	///
10880	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
10881	#[inline]
10882	#[target_feature(enable = "avx512f")]
10883	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10884	#[cfg_attr(test, assert_instr(vcvtps2pd))]
10885	pub unsafe fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
10886	transmute(src:vcvtps2pd(
10887	a:_mm512_castps512_ps256(v2).as_f32x8(),
10888	src:src.as_f64x8(),
10889	mask:k,
10890	_MM_FROUND_CUR_DIRECTION,
10891	))
10892	}
10893
10894	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
10895	///
10896	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
10897	#[inline]
10898	#[target_feature(enable = "avx512f")]
10899	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10900	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10901	pub unsafe fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
10902	transmute(src:vcvtpd2ps(
10903	a:a.as_f64x8(),
10904	src:_mm256_setzero_ps().as_f32x8(),
10905	mask:`0b11111111`,
10906	_MM_FROUND_CUR_DIRECTION,
10907	))
10908	}
10909
10910	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10911	///
10912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
10913	#[inline]
10914	#[target_feature(enable = "avx512f")]
10915	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10916	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10917	pub unsafe fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
10918	transmute(src:vcvtpd2ps(
10919	a:a.as_f64x8(),
10920	src:src.as_f32x8(),
10921	mask:k,
10922	_MM_FROUND_CUR_DIRECTION,
10923	))
10924	}
10925
10926	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10927	///
10928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
10929	#[inline]
10930	#[target_feature(enable = "avx512f")]
10931	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10932	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10933	pub unsafe fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
10934	transmute(src:vcvtpd2ps(
10935	a:a.as_f64x8(),
10936	src:_mm256_setzero_ps().as_f32x8(),
10937	mask:k,
10938	_MM_FROUND_CUR_DIRECTION,
10939	))
10940	}
10941
10942	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10943	///
10944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
10945	#[inline]
10946	#[target_feature(enable = "avx512f,avx512vl")]
10947	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10948	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10949	pub unsafe fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
10950	let convert: __m128 = _mm256_cvtpd_ps(a);
10951	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
10952	}
10953
10954	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10955	///
10956	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
10957	#[inline]
10958	#[target_feature(enable = "avx512f,avx512vl")]
10959	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10960	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10961	pub unsafe fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
10962	let convert: __m128 = _mm256_cvtpd_ps(a);
10963	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
10964	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:zero))
10965	}
10966
10967	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10968	///
10969	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
10970	#[inline]
10971	#[target_feature(enable = "avx512f,avx512vl")]
10972	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10973	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10974	pub unsafe fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
10975	let convert: __m128 = _mm_cvtpd_ps(a);
10976	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
10977	}
10978
10979	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10980	///
10981	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
10982	#[inline]
10983	#[target_feature(enable = "avx512f,avx512vl")]
10984	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10985	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10986	pub unsafe fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
10987	let convert: __m128 = _mm_cvtpd_ps(a);
10988	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
10989	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:zero))
10990	}
10991
10992	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10993	///
10994	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
10995	#[inline]
10996	#[target_feature(enable = "avx512f")]
10997	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10998	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
10999	pub unsafe fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11000	transmute(src:vcvtpd2dq(
11001	a:a.as_f64x8(),
11002	src:_mm256_setzero_si256().as_i32x8(),
11003	mask:`0b11111111`,
11004	_MM_FROUND_CUR_DIRECTION,
11005	))
11006	}
11007
11008	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11009	///
11010	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11011	#[inline]
11012	#[target_feature(enable = "avx512f")]
11013	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11014	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11015	pub unsafe fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11016	transmute(src:vcvtpd2dq(
11017	a:a.as_f64x8(),
11018	src:src.as_i32x8(),
11019	mask:k,
11020	_MM_FROUND_CUR_DIRECTION,
11021	))
11022	}
11023
11024	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11025	///
11026	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11027	#[inline]
11028	#[target_feature(enable = "avx512f")]
11029	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11030	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11031	pub unsafe fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11032	transmute(src:vcvtpd2dq(
11033	a:a.as_f64x8(),
11034	src:_mm256_setzero_si256().as_i32x8(),
11035	mask:k,
11036	_MM_FROUND_CUR_DIRECTION,
11037	))
11038	}
11039
11040	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11041	///
11042	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11043	#[inline]
11044	#[target_feature(enable = "avx512f,avx512vl")]
11045	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11046	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11047	pub unsafe fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11048	let convert: __m128i = _mm256_cvtpd_epi32(a);
11049	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11050	}
11051
11052	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11053	///
11054	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11055	#[inline]
11056	#[target_feature(enable = "avx512f,avx512vl")]
11057	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11058	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11059	pub unsafe fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11060	let convert: __m128i = _mm256_cvtpd_epi32(a);
11061	transmute(src:simd_select_bitmask(
11062	m:k,
11063	yes:convert.as_i32x4(),
11064	no:_mm_setzero_si128().as_i32x4(),
11065	))
11066	}
11067
11068	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11069	///
11070	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11071	#[inline]
11072	#[target_feature(enable = "avx512f,avx512vl")]
11073	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11074	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11075	pub unsafe fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11076	let convert: __m128i = _mm_cvtpd_epi32(a);
11077	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11078	}
11079
11080	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11081	///
11082	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11083	#[inline]
11084	#[target_feature(enable = "avx512f,avx512vl")]
11085	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11086	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11087	pub unsafe fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11088	let convert: __m128i = _mm_cvtpd_epi32(a);
11089	transmute(src:simd_select_bitmask(
11090	m:k,
11091	yes:convert.as_i32x4(),
11092	no:_mm_setzero_si128().as_i32x4(),
11093	))
11094	}
11095
11096	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11097	///
11098	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11099	#[inline]
11100	#[target_feature(enable = "avx512f")]
11101	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11102	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11103	pub unsafe fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11104	transmute(src:vcvtpd2udq(
11105	a:a.as_f64x8(),
11106	src:_mm256_setzero_si256().as_u32x8(),
11107	mask:`0b11111111`,
11108	_MM_FROUND_CUR_DIRECTION,
11109	))
11110	}
11111
11112	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11113	///
11114	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11115	#[inline]
11116	#[target_feature(enable = "avx512f")]
11117	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11118	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11119	pub unsafe fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11120	transmute(src:vcvtpd2udq(
11121	a:a.as_f64x8(),
11122	src:src.as_u32x8(),
11123	mask:k,
11124	_MM_FROUND_CUR_DIRECTION,
11125	))
11126	}
11127
11128	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11129	///
11130	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11131	#[inline]
11132	#[target_feature(enable = "avx512f")]
11133	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11134	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11135	pub unsafe fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11136	transmute(src:vcvtpd2udq(
11137	a:a.as_f64x8(),
11138	src:_mm256_setzero_si256().as_u32x8(),
11139	mask:k,
11140	_MM_FROUND_CUR_DIRECTION,
11141	))
11142	}
11143
11144	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11145	///
11146	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11147	#[inline]
11148	#[target_feature(enable = "avx512f,avx512vl")]
11149	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11150	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11151	pub unsafe fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11152	transmute(src:vcvtpd2udq256(
11153	a:a.as_f64x4(),
11154	src:_mm_setzero_si128().as_u32x4(),
11155	mask:`0b11111111`,
11156	))
11157	}
11158
11159	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11160	///
11161	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11162	#[inline]
11163	#[target_feature(enable = "avx512f,avx512vl")]
11164	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11165	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11166	pub unsafe fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11167	transmute(src:vcvtpd2udq256(a:a.as_f64x4(), src:src.as_u32x4(), mask:k))
11168	}
11169
11170	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11171	///
11172	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11173	#[inline]
11174	#[target_feature(enable = "avx512f,avx512vl")]
11175	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11176	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11177	pub unsafe fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11178	transmute(src:vcvtpd2udq256(
11179	a:a.as_f64x4(),
11180	src:_mm_setzero_si128().as_u32x4(),
11181	mask:k,
11182	))
11183	}
11184
11185	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11186	///
11187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11188	#[inline]
11189	#[target_feature(enable = "avx512f,avx512vl")]
11190	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11191	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11192	pub unsafe fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11193	transmute(src:vcvtpd2udq128(
11194	a:a.as_f64x2(),
11195	src:_mm_setzero_si128().as_u32x4(),
11196	mask:`0b11111111`,
11197	))
11198	}
11199
11200	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11201	///
11202	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11203	#[inline]
11204	#[target_feature(enable = "avx512f,avx512vl")]
11205	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11206	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11207	pub unsafe fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11208	transmute(src:vcvtpd2udq128(a:a.as_f64x2(), src:src.as_u32x4(), mask:k))
11209	}
11210
11211	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11212	///
11213	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11214	#[inline]
11215	#[target_feature(enable = "avx512f,avx512vl")]
11216	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11217	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11218	pub unsafe fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11219	transmute(src:vcvtpd2udq128(
11220	a:a.as_f64x2(),
11221	src:_mm_setzero_si128().as_u32x4(),
11222	mask:k,
11223	))
11224	}
11225
11226	/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11227	///
11228	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11229	#[inline]
11230	#[target_feature(enable = "avx512f")]
11231	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11232	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11233	pub unsafe fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11234	let r: f32x8 = vcvtpd2ps(
11235	a:v2.as_f64x8(),
11236	src:_mm256_setzero_ps().as_f32x8(),
11237	mask:`0b11111111`,
11238	_MM_FROUND_CUR_DIRECTION,
11239	);
11240	simd_shuffle!(
11241	r,
11242	_mm256_setzero_ps().as_f32x8(),
11243	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`],
11244	)
11245	}
11246
11247	/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11248	///
11249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11250	#[inline]
11251	#[target_feature(enable = "avx512f")]
11252	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11253	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11254	pub unsafe fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11255	let r: f32x8 = vcvtpd2ps(
11256	a:v2.as_f64x8(),
11257	src:_mm512_castps512_ps256(src).as_f32x8(),
11258	mask:k,
11259	_MM_FROUND_CUR_DIRECTION,
11260	);
11261	simd_shuffle!(
11262	r,
11263	_mm256_setzero_ps().as_f32x8(),
11264	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`],
11265	)
11266	}
11267
11268	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11269	///
11270	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11271	#[inline]
11272	#[target_feature(enable = "avx512f")]
11273	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11274	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11275	pub unsafe fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11276	let a: i8x16 = a.as_i8x16();
11277	transmute::<i32x16, _>(src:simd_cast(a))
11278	}
11279
11280	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11281	///
11282	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11283	#[inline]
11284	#[target_feature(enable = "avx512f")]
11285	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11286	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11287	pub unsafe fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11288	let convert: i32x16 = _mm512_cvtepi8_epi32(a).as_i32x16();
11289	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11290	}
11291
11292	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11293	///
11294	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11295	#[inline]
11296	#[target_feature(enable = "avx512f")]
11297	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11298	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11299	pub unsafe fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11300	let convert: i32x16 = _mm512_cvtepi8_epi32(a).as_i32x16();
11301	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
11302	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11303	}
11304
11305	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11306	///
11307	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11308	#[inline]
11309	#[target_feature(enable = "avx512f,avx512vl")]
11310	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11311	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11312	pub unsafe fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11313	let convert: i32x8 = _mm256_cvtepi8_epi32(a).as_i32x8();
11314	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11315	}
11316
11317	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11318	///
11319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11320	#[inline]
11321	#[target_feature(enable = "avx512f,avx512vl")]
11322	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11323	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11324	pub unsafe fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11325	let convert: i32x8 = _mm256_cvtepi8_epi32(a).as_i32x8();
11326	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
11327	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11328	}
11329
11330	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11331	///
11332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11333	#[inline]
11334	#[target_feature(enable = "avx512f,avx512vl")]
11335	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11336	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11337	pub unsafe fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11338	let convert: i32x4 = _mm_cvtepi8_epi32(a).as_i32x4();
11339	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11340	}
11341
11342	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11343	///
11344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11345	#[inline]
11346	#[target_feature(enable = "avx512f,avx512vl")]
11347	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11348	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11349	pub unsafe fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11350	let convert: i32x4 = _mm_cvtepi8_epi32(a).as_i32x4();
11351	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
11352	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11353	}
11354
11355	/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11356	///
11357	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11358	#[inline]
11359	#[target_feature(enable = "avx512f")]
11360	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11361	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11362	pub unsafe fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11363	let a: i8x16 = a.as_i8x16();
11364	let v64: i8x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11365	transmute::<i64x8, _>(src:simd_cast(v64))
11366	}
11367
11368	/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11369	///
11370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11371	#[inline]
11372	#[target_feature(enable = "avx512f")]
11373	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11374	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11375	pub unsafe fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11376	let convert: i64x8 = _mm512_cvtepi8_epi64(a).as_i64x8();
11377	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11378	}
11379
11380	/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11381	///
11382	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11383	#[inline]
11384	#[target_feature(enable = "avx512f")]
11385	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11386	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11387	pub unsafe fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11388	let convert: i64x8 = _mm512_cvtepi8_epi64(a).as_i64x8();
11389	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
11390	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11391	}
11392
11393	/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11394	///
11395	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11396	#[inline]
11397	#[target_feature(enable = "avx512f,avx512vl")]
11398	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11399	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11400	pub unsafe fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11401	let convert: i64x4 = _mm256_cvtepi8_epi64(a).as_i64x4();
11402	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11403	}
11404
11405	/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11406	///
11407	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11408	#[inline]
11409	#[target_feature(enable = "avx512f,avx512vl")]
11410	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11411	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11412	pub unsafe fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11413	let convert: i64x4 = _mm256_cvtepi8_epi64(a).as_i64x4();
11414	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
11415	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11416	}
11417
11418	/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11419	///
11420	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11421	#[inline]
11422	#[target_feature(enable = "avx512f,avx512vl")]
11423	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11424	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11425	pub unsafe fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11426	let convert: i64x2 = _mm_cvtepi8_epi64(a).as_i64x2();
11427	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11428	}
11429
11430	/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11431	///
11432	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11433	#[inline]
11434	#[target_feature(enable = "avx512f,avx512vl")]
11435	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11436	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11437	pub unsafe fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11438	let convert: i64x2 = _mm_cvtepi8_epi64(a).as_i64x2();
11439	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
11440	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11441	}
11442
11443	/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11444	///
11445	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
11446	#[inline]
11447	#[target_feature(enable = "avx512f")]
11448	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11449	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11450	pub unsafe fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
11451	let a: u8x16 = a.as_u8x16();
11452	transmute::<i32x16, _>(src:simd_cast(a))
11453	}
11454
11455	/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11456	///
11457	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
11458	#[inline]
11459	#[target_feature(enable = "avx512f")]
11460	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11461	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11462	pub unsafe fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11463	let convert: i32x16 = _mm512_cvtepu8_epi32(a).as_i32x16();
11464	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11465	}
11466
11467	/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11468	///
11469	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
11470	#[inline]
11471	#[target_feature(enable = "avx512f")]
11472	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11473	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11474	pub unsafe fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11475	let convert: i32x16 = _mm512_cvtepu8_epi32(a).as_i32x16();
11476	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
11477	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11478	}
11479
11480	/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11481	///
11482	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
11483	#[inline]
11484	#[target_feature(enable = "avx512f,avx512vl")]
11485	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11486	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11487	pub unsafe fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11488	let convert: i32x8 = _mm256_cvtepu8_epi32(a).as_i32x8();
11489	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11490	}
11491
11492	/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11493	///
11494	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
11495	#[inline]
11496	#[target_feature(enable = "avx512f,avx512vl")]
11497	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11498	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11499	pub unsafe fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11500	let convert: i32x8 = _mm256_cvtepu8_epi32(a).as_i32x8();
11501	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
11502	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11503	}
11504
11505	/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11506	///
11507	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
11508	#[inline]
11509	#[target_feature(enable = "avx512f,avx512vl")]
11510	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11511	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11512	pub unsafe fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11513	let convert: i32x4 = _mm_cvtepu8_epi32(a).as_i32x4();
11514	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11515	}
11516
11517	/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11518	///
11519	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
11520	#[inline]
11521	#[target_feature(enable = "avx512f,avx512vl")]
11522	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11523	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11524	pub unsafe fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11525	let convert: i32x4 = _mm_cvtepu8_epi32(a).as_i32x4();
11526	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
11527	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11528	}
11529
11530	/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
11531	///
11532	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
11533	#[inline]
11534	#[target_feature(enable = "avx512f")]
11535	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11536	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11537	pub unsafe fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
11538	let a: u8x16 = a.as_u8x16();
11539	let v64: u8x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11540	transmute::<i64x8, _>(src:simd_cast(v64))
11541	}
11542
11543	/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11544	///
11545	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
11546	#[inline]
11547	#[target_feature(enable = "avx512f")]
11548	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11549	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11550	pub unsafe fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11551	let convert: i64x8 = _mm512_cvtepu8_epi64(a).as_i64x8();
11552	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11553	}
11554
11555	/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11556	///
11557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
11558	#[inline]
11559	#[target_feature(enable = "avx512f")]
11560	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11561	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11562	pub unsafe fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11563	let convert: i64x8 = _mm512_cvtepu8_epi64(a).as_i64x8();
11564	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
11565	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11566	}
11567
11568	/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11569	///
11570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
11571	#[inline]
11572	#[target_feature(enable = "avx512f,avx512vl")]
11573	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11574	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11575	pub unsafe fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11576	let convert: i64x4 = _mm256_cvtepu8_epi64(a).as_i64x4();
11577	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11578	}
11579
11580	/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11581	///
11582	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
11583	#[inline]
11584	#[target_feature(enable = "avx512f,avx512vl")]
11585	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11586	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11587	pub unsafe fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11588	let convert: i64x4 = _mm256_cvtepu8_epi64(a).as_i64x4();
11589	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
11590	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11591	}
11592
11593	/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11594	///
11595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
11596	#[inline]
11597	#[target_feature(enable = "avx512f,avx512vl")]
11598	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11599	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11600	pub unsafe fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11601	let convert: i64x2 = _mm_cvtepu8_epi64(a).as_i64x2();
11602	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11603	}
11604
11605	/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11606	///
11607	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
11608	#[inline]
11609	#[target_feature(enable = "avx512f,avx512vl")]
11610	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11611	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11612	pub unsafe fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11613	let convert: i64x2 = _mm_cvtepu8_epi64(a).as_i64x2();
11614	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
11615	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11616	}
11617
11618	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11619	///
11620	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
11621	#[inline]
11622	#[target_feature(enable = "avx512f")]
11623	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11624	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11625	pub unsafe fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
11626	let a: i16x16 = a.as_i16x16();
11627	transmute::<i32x16, _>(src:simd_cast(a))
11628	}
11629
11630	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11631	///
11632	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
11633	#[inline]
11634	#[target_feature(enable = "avx512f")]
11635	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11636	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11637	pub unsafe fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11638	let convert: i32x16 = _mm512_cvtepi16_epi32(a).as_i32x16();
11639	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11640	}
11641
11642	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11643	///
11644	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
11645	#[inline]
11646	#[target_feature(enable = "avx512f")]
11647	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11648	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11649	pub unsafe fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11650	let convert: i32x16 = _mm512_cvtepi16_epi32(a).as_i32x16();
11651	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
11652	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11653	}
11654
11655	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11656	///
11657	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
11658	#[inline]
11659	#[target_feature(enable = "avx512f,avx512vl")]
11660	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11661	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11662	pub unsafe fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11663	let convert: i32x8 = _mm256_cvtepi16_epi32(a).as_i32x8();
11664	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11665	}
11666
11667	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11668	///
11669	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
11670	#[inline]
11671	#[target_feature(enable = "avx512f,avx512vl")]
11672	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11673	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11674	pub unsafe fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11675	let convert: i32x8 = _mm256_cvtepi16_epi32(a).as_i32x8();
11676	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
11677	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11678	}
11679
11680	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11681	///
11682	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
11683	#[inline]
11684	#[target_feature(enable = "avx512f,avx512vl")]
11685	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11686	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11687	pub unsafe fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11688	let convert: i32x4 = _mm_cvtepi16_epi32(a).as_i32x4();
11689	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11690	}
11691
11692	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11693	///
11694	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
11695	#[inline]
11696	#[target_feature(enable = "avx512f,avx512vl")]
11697	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11698	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11699	pub unsafe fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11700	let convert: i32x4 = _mm_cvtepi16_epi32(a).as_i32x4();
11701	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
11702	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11703	}
11704
11705	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
11706	///
11707	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
11708	#[inline]
11709	#[target_feature(enable = "avx512f")]
11710	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11711	#[cfg_attr(test, assert_instr(vpmovsxwq))]
11712	pub unsafe fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
11713	let a: i16x8 = a.as_i16x8();
11714	transmute::<i64x8, _>(src:simd_cast(a))
11715	}
11716
11717	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11718	///
11719	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
11720	#[inline]
11721	#[target_feature(enable = "avx512f")]
11722	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11723	#[cfg_attr(test, assert_instr(vpmovsxwq))]
11724	pub unsafe fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11725	let convert: i64x8 = _mm512_cvtepi16_epi64(a).as_i64x8();
11726	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11727	}
11728
11729	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11730	///
11731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
11732	#[inline]
11733	#[target_feature(enable = "avx512f")]
11734	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11735	#[cfg_attr(test, assert_instr(vpmovsxwq))]
11736	pub unsafe fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
11737	let convert: i64x8 = _mm512_cvtepi16_epi64(a).as_i64x8();
11738	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
11739	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11740	}
11741
11742	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11743	///
11744	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
11745	#[inline]
11746	#[target_feature(enable = "avx512f,avx512vl")]
11747	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11748	#[cfg_attr(test, assert_instr(vpmovsxwq))]
11749	pub unsafe fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11750	let convert: i64x4 = _mm256_cvtepi16_epi64(a).as_i64x4();
11751	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11752	}
11753
11754	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11755	///
11756	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
11757	#[inline]
11758	#[target_feature(enable = "avx512f,avx512vl")]
11759	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11760	#[cfg_attr(test, assert_instr(vpmovsxwq))]
11761	pub unsafe fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
11762	let convert: i64x4 = _mm256_cvtepi16_epi64(a).as_i64x4();
11763	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
11764	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11765	}
11766
11767	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11768	///
11769	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
11770	#[inline]
11771	#[target_feature(enable = "avx512f,avx512vl")]
11772	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11773	#[cfg_attr(test, assert_instr(vpmovsxwq))]
11774	pub unsafe fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11775	let convert: i64x2 = _mm_cvtepi16_epi64(a).as_i64x2();
11776	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11777	}
11778
11779	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11780	///
11781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
11782	#[inline]
11783	#[target_feature(enable = "avx512f,avx512vl")]
11784	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11785	#[cfg_attr(test, assert_instr(vpmovsxwq))]
11786	pub unsafe fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
11787	let convert: i64x2 = _mm_cvtepi16_epi64(a).as_i64x2();
11788	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
11789	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11790	}
11791
11792	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11793	///
11794	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
11795	#[inline]
11796	#[target_feature(enable = "avx512f")]
11797	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11798	#[cfg_attr(test, assert_instr(vpmovzxwd))]
11799	pub unsafe fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
11800	let a: u16x16 = a.as_u16x16();
11801	transmute::<i32x16, _>(src:simd_cast(a))
11802	}
11803
11804	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11805	///
11806	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
11807	#[inline]
11808	#[target_feature(enable = "avx512f")]
11809	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11810	#[cfg_attr(test, assert_instr(vpmovzxwd))]
11811	pub unsafe fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11812	let convert: i32x16 = _mm512_cvtepu16_epi32(a).as_i32x16();
11813	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11814	}
11815
11816	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11817	///
11818	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
11819	#[inline]
11820	#[target_feature(enable = "avx512f")]
11821	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11822	#[cfg_attr(test, assert_instr(vpmovzxwd))]
11823	pub unsafe fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11824	let convert: i32x16 = _mm512_cvtepu16_epi32(a).as_i32x16();
11825	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
11826	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11827	}
11828
11829	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11830	///
11831	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
11832	#[inline]
11833	#[target_feature(enable = "avx512f,avx512vl")]
11834	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11835	#[cfg_attr(test, assert_instr(vpmovzxwd))]
11836	pub unsafe fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11837	let convert: i32x8 = _mm256_cvtepu16_epi32(a).as_i32x8();
11838	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11839	}
11840
11841	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11842	///
11843	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
11844	#[inline]
11845	#[target_feature(enable = "avx512f,avx512vl")]
11846	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11847	#[cfg_attr(test, assert_instr(vpmovzxwd))]
11848	pub unsafe fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11849	let convert: i32x8 = _mm256_cvtepu16_epi32(a).as_i32x8();
11850	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
11851	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11852	}
11853
11854	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11855	///
11856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
11857	#[inline]
11858	#[target_feature(enable = "avx512f,avx512vl")]
11859	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11860	#[cfg_attr(test, assert_instr(vpmovzxwd))]
11861	pub unsafe fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11862	let convert: i32x4 = _mm_cvtepu16_epi32(a).as_i32x4();
11863	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11864	}
11865
11866	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11867	///
11868	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
11869	#[inline]
11870	#[target_feature(enable = "avx512f,avx512vl")]
11871	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11872	#[cfg_attr(test, assert_instr(vpmovzxwd))]
11873	pub unsafe fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11874	let convert: i32x4 = _mm_cvtepu16_epi32(a).as_i32x4();
11875	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
11876	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11877	}
11878
11879	/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
11880	///
11881	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
11882	#[inline]
11883	#[target_feature(enable = "avx512f")]
11884	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11885	#[cfg_attr(test, assert_instr(vpmovzxwq))]
11886	pub unsafe fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
11887	let a: u16x8 = a.as_u16x8();
11888	transmute::<i64x8, _>(src:simd_cast(a))
11889	}
11890
11891	/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11892	///
11893	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
11894	#[inline]
11895	#[target_feature(enable = "avx512f")]
11896	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11897	#[cfg_attr(test, assert_instr(vpmovzxwq))]
11898	pub unsafe fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11899	let convert: i64x8 = _mm512_cvtepu16_epi64(a).as_i64x8();
11900	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11901	}
11902
11903	/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11904	///
11905	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
11906	#[inline]
11907	#[target_feature(enable = "avx512f")]
11908	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11909	#[cfg_attr(test, assert_instr(vpmovzxwq))]
11910	pub unsafe fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
11911	let convert: i64x8 = _mm512_cvtepu16_epi64(a).as_i64x8();
11912	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
11913	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11914	}
11915
11916	/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11917	///
11918	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
11919	#[inline]
11920	#[target_feature(enable = "avx512f,avx512vl")]
11921	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11922	#[cfg_attr(test, assert_instr(vpmovzxwq))]
11923	pub unsafe fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11924	let convert: i64x4 = _mm256_cvtepu16_epi64(a).as_i64x4();
11925	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11926	}
11927
11928	/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11929	///
11930	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
11931	#[inline]
11932	#[target_feature(enable = "avx512f,avx512vl")]
11933	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11934	#[cfg_attr(test, assert_instr(vpmovzxwq))]
11935	pub unsafe fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
11936	let convert: i64x4 = _mm256_cvtepu16_epi64(a).as_i64x4();
11937	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
11938	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11939	}
11940
11941	/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11942	///
11943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
11944	#[inline]
11945	#[target_feature(enable = "avx512f,avx512vl")]
11946	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11947	#[cfg_attr(test, assert_instr(vpmovzxwq))]
11948	pub unsafe fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11949	let convert: i64x2 = _mm_cvtepu16_epi64(a).as_i64x2();
11950	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11951	}
11952
11953	/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11954	///
11955	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
11956	#[inline]
11957	#[target_feature(enable = "avx512f,avx512vl")]
11958	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11959	#[cfg_attr(test, assert_instr(vpmovzxwq))]
11960	pub unsafe fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
11961	let convert: i64x2 = _mm_cvtepu16_epi64(a).as_i64x2();
11962	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
11963	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11964	}
11965
11966	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
11967	///
11968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
11969	#[inline]
11970	#[target_feature(enable = "avx512f")]
11971	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11972	#[cfg_attr(test, assert_instr(vpmovsxdq))]
11973	pub unsafe fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
11974	let a: i32x8 = a.as_i32x8();
11975	transmute::<i64x8, _>(src:simd_cast(a))
11976	}
11977
11978	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11979	///
11980	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
11981	#[inline]
11982	#[target_feature(enable = "avx512f")]
11983	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11984	#[cfg_attr(test, assert_instr(vpmovsxdq))]
11985	pub unsafe fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
11986	let convert: i64x8 = _mm512_cvtepi32_epi64(a).as_i64x8();
11987	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11988	}
11989
11990	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11991	///
11992	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
11993	#[inline]
11994	#[target_feature(enable = "avx512f")]
11995	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11996	#[cfg_attr(test, assert_instr(vpmovsxdq))]
11997	pub unsafe fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
11998	let convert: i64x8 = _mm512_cvtepi32_epi64(a).as_i64x8();
11999	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
12000	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12001	}
12002
12003	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12004	///
12005	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12006	#[inline]
12007	#[target_feature(enable = "avx512f,avx512vl")]
12008	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12009	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12010	pub unsafe fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12011	let convert: i64x4 = _mm256_cvtepi32_epi64(a).as_i64x4();
12012	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12013	}
12014
12015	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12016	///
12017	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12018	#[inline]
12019	#[target_feature(enable = "avx512f,avx512vl")]
12020	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12021	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12022	pub unsafe fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12023	let convert: i64x4 = _mm256_cvtepi32_epi64(a).as_i64x4();
12024	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
12025	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12026	}
12027
12028	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12029	///
12030	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12031	#[inline]
12032	#[target_feature(enable = "avx512f,avx512vl")]
12033	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12034	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12035	pub unsafe fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12036	let convert: i64x2 = _mm_cvtepi32_epi64(a).as_i64x2();
12037	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12038	}
12039
12040	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12041	///
12042	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12043	#[inline]
12044	#[target_feature(enable = "avx512f,avx512vl")]
12045	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12046	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12047	pub unsafe fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12048	let convert: i64x2 = _mm_cvtepi32_epi64(a).as_i64x2();
12049	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
12050	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12051	}
12052
12053	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12054	///
12055	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12056	#[inline]
12057	#[target_feature(enable = "avx512f")]
12058	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12059	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12060	pub unsafe fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12061	let a: u32x8 = a.as_u32x8();
12062	transmute::<i64x8, _>(src:simd_cast(a))
12063	}
12064
12065	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12066	///
12067	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12068	#[inline]
12069	#[target_feature(enable = "avx512f")]
12070	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12071	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12072	pub unsafe fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12073	let convert: i64x8 = _mm512_cvtepu32_epi64(a).as_i64x8();
12074	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12075	}
12076
12077	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12078	///
12079	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12080	#[inline]
12081	#[target_feature(enable = "avx512f")]
12082	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12083	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12084	pub unsafe fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12085	let convert: i64x8 = _mm512_cvtepu32_epi64(a).as_i64x8();
12086	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
12087	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12088	}
12089
12090	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12091	///
12092	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12093	#[inline]
12094	#[target_feature(enable = "avx512f,avx512vl")]
12095	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12096	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12097	pub unsafe fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12098	let convert: i64x4 = _mm256_cvtepu32_epi64(a).as_i64x4();
12099	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12100	}
12101
12102	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12103	///
12104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12105	#[inline]
12106	#[target_feature(enable = "avx512f,avx512vl")]
12107	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12108	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12109	pub unsafe fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12110	let convert: i64x4 = _mm256_cvtepu32_epi64(a).as_i64x4();
12111	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
12112	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12113	}
12114
12115	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12116	///
12117	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12118	#[inline]
12119	#[target_feature(enable = "avx512f,avx512vl")]
12120	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12121	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12122	pub unsafe fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12123	let convert: i64x2 = _mm_cvtepu32_epi64(a).as_i64x2();
12124	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12125	}
12126
12127	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12128	///
12129	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12130	#[inline]
12131	#[target_feature(enable = "avx512f,avx512vl")]
12132	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12133	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12134	pub unsafe fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12135	let convert: i64x2 = _mm_cvtepu32_epi64(a).as_i64x2();
12136	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
12137	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12138	}
12139
12140	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12141	///
12142	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12143	#[inline]
12144	#[target_feature(enable = "avx512f")]
12145	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12146	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12147	pub unsafe fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12148	let a: i32x16 = a.as_i32x16();
12149	transmute::<f32x16, _>(src:simd_cast(a))
12150	}
12151
12152	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12153	///
12154	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12155	#[inline]
12156	#[target_feature(enable = "avx512f")]
12157	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12158	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12159	pub unsafe fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12160	let convert: f32x16 = _mm512_cvtepi32_ps(a).as_f32x16();
12161	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
12162	}
12163
12164	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12165	///
12166	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12167	#[inline]
12168	#[target_feature(enable = "avx512f")]
12169	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12170	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12171	pub unsafe fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12172	let convert: f32x16 = _mm512_cvtepi32_ps(a).as_f32x16();
12173	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
12174	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12175	}
12176
12177	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12178	///
12179	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12180	#[inline]
12181	#[target_feature(enable = "avx512f,avx512vl")]
12182	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12183	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12184	pub unsafe fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12185	let convert: f32x8 = _mm256_cvtepi32_ps(a).as_f32x8();
12186	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x8()))
12187	}
12188
12189	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12190	///
12191	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12192	#[inline]
12193	#[target_feature(enable = "avx512f,avx512vl")]
12194	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12195	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12196	pub unsafe fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12197	let convert: f32x8 = _mm256_cvtepi32_ps(a).as_f32x8();
12198	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
12199	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12200	}
12201
12202	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12203	///
12204	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12205	#[inline]
12206	#[target_feature(enable = "avx512f,avx512vl")]
12207	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12208	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12209	pub unsafe fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12210	let convert: f32x4 = _mm_cvtepi32_ps(a).as_f32x4();
12211	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x4()))
12212	}
12213
12214	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12215	///
12216	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12217	#[inline]
12218	#[target_feature(enable = "avx512f,avx512vl")]
12219	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12220	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12221	pub unsafe fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12222	let convert: f32x4 = _mm_cvtepi32_ps(a).as_f32x4();
12223	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
12224	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12225	}
12226
12227	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12228	///
12229	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12230	#[inline]
12231	#[target_feature(enable = "avx512f")]
12232	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12233	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12234	pub unsafe fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12235	let a: i32x8 = a.as_i32x8();
12236	transmute::<f64x8, _>(src:simd_cast(a))
12237	}
12238
12239	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12240	///
12241	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12242	#[inline]
12243	#[target_feature(enable = "avx512f")]
12244	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12245	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12246	pub unsafe fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12247	let convert: f64x8 = _mm512_cvtepi32_pd(a).as_f64x8();
12248	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12249	}
12250
12251	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12252	///
12253	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12254	#[inline]
12255	#[target_feature(enable = "avx512f")]
12256	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12257	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12258	pub unsafe fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12259	let convert: f64x8 = _mm512_cvtepi32_pd(a).as_f64x8();
12260	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
12261	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12262	}
12263
12264	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12265	///
12266	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12267	#[inline]
12268	#[target_feature(enable = "avx512f,avx512vl")]
12269	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12270	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12271	pub unsafe fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12272	let convert: f64x4 = _mm256_cvtepi32_pd(a).as_f64x4();
12273	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
12274	}
12275
12276	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12277	///
12278	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
12279	#[inline]
12280	#[target_feature(enable = "avx512f,avx512vl")]
12281	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12282	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12283	pub unsafe fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
12284	let convert: f64x4 = _mm256_cvtepi32_pd(a).as_f64x4();
12285	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
12286	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12287	}
12288
12289	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12290	///
12291	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
12292	#[inline]
12293	#[target_feature(enable = "avx512f,avx512vl")]
12294	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12295	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12296	pub unsafe fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12297	let convert: f64x2 = _mm_cvtepi32_pd(a).as_f64x2();
12298	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
12299	}
12300
12301	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12302	///
12303	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
12304	#[inline]
12305	#[target_feature(enable = "avx512f,avx512vl")]
12306	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12307	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12308	pub unsafe fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
12309	let convert: f64x2 = _mm_cvtepi32_pd(a).as_f64x2();
12310	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
12311	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12312	}
12313
12314	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12315	///
12316	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
12317	#[inline]
12318	#[target_feature(enable = "avx512f")]
12319	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12320	#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12321	pub unsafe fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
12322	let a: u32x16 = a.as_u32x16();
12323	transmute::<f32x16, _>(src:simd_cast(a))
12324	}
12325
12326	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12327	///
12328	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
12329	#[inline]
12330	#[target_feature(enable = "avx512f")]
12331	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12332	#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12333	pub unsafe fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12334	let convert: f32x16 = _mm512_cvtepu32_ps(a).as_f32x16();
12335	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
12336	}
12337
12338	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12339	///
12340	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
12341	#[inline]
12342	#[target_feature(enable = "avx512f")]
12343	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12344	#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12345	pub unsafe fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
12346	let convert: f32x16 = _mm512_cvtepu32_ps(a).as_f32x16();
12347	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
12348	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12349	}
12350
12351	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12352	///
12353	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
12354	#[inline]
12355	#[target_feature(enable = "avx512f")]
12356	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12357	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12358	pub unsafe fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
12359	let a: u32x8 = a.as_u32x8();
12360	transmute::<f64x8, _>(src:simd_cast(a))
12361	}
12362
12363	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12364	///
12365	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
12366	#[inline]
12367	#[target_feature(enable = "avx512f")]
12368	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12369	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12370	pub unsafe fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12371	let convert: f64x8 = _mm512_cvtepu32_pd(a).as_f64x8();
12372	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12373	}
12374
12375	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12376	///
12377	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
12378	#[inline]
12379	#[target_feature(enable = "avx512f")]
12380	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12381	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12382	pub unsafe fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
12383	let convert: f64x8 = _mm512_cvtepu32_pd(a).as_f64x8();
12384	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
12385	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12386	}
12387
12388	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12389	///
12390	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
12391	#[inline]
12392	#[target_feature(enable = "avx512f,avx512vl")]
12393	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12394	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12395	pub unsafe fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
12396	let a: u32x4 = a.as_u32x4();
12397	transmute::<f64x4, _>(src:simd_cast(a))
12398	}
12399
12400	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12401	///
12402	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
12403	#[inline]
12404	#[target_feature(enable = "avx512f,avx512vl")]
12405	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12406	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12407	pub unsafe fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12408	let convert: f64x4 = _mm256_cvtepu32_pd(a).as_f64x4();
12409	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
12410	}
12411
12412	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12413	///
12414	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
12415	#[inline]
12416	#[target_feature(enable = "avx512f,avx512vl")]
12417	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12418	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12419	pub unsafe fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
12420	let convert: f64x4 = _mm256_cvtepu32_pd(a).as_f64x4();
12421	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
12422	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12423	}
12424
12425	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12426	///
12427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
12428	#[inline]
12429	#[target_feature(enable = "avx512f,avx512vl")]
12430	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12431	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12432	pub unsafe fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
12433	let a: u32x4 = a.as_u32x4();
12434	let u64: u32x2 = simd_shuffle!(a, a, [`0`, `1`]);
12435	transmute::<f64x2, _>(src:simd_cast(u64))
12436	}
12437
12438	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12439	///
12440	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
12441	#[inline]
12442	#[target_feature(enable = "avx512f,avx512vl")]
12443	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12444	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12445	pub unsafe fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12446	let convert: f64x2 = _mm_cvtepu32_pd(a).as_f64x2();
12447	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
12448	}
12449
12450	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12451	///
12452	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
12453	#[inline]
12454	#[target_feature(enable = "avx512f,avx512vl")]
12455	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12456	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12457	pub unsafe fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
12458	let convert: f64x2 = _mm_cvtepu32_pd(a).as_f64x2();
12459	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
12460	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12461	}
12462
12463	/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12464	///
12465	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
12466	#[inline]
12467	#[target_feature(enable = "avx512f")]
12468	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12469	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12470	pub unsafe fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
12471	let v2: i32x16 = v2.as_i32x16();
12472	let v256: i32x8 = simd_shuffle!(v2, v2, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
12473	transmute::<f64x8, _>(src:simd_cast(v256))
12474	}
12475
12476	/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12477	///
12478	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
12479	#[inline]
12480	#[target_feature(enable = "avx512f")]
12481	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12482	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12483	pub unsafe fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12484	let convert: f64x8 = _mm512_cvtepi32lo_pd(v2).as_f64x8();
12485	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12486	}
12487
12488	/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12489	///
12490	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
12491	#[inline]
12492	#[target_feature(enable = "avx512f")]
12493	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12494	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12495	pub unsafe fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
12496	let v2: u32x16 = v2.as_u32x16();
12497	let v256: u32x8 = simd_shuffle!(v2, v2, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
12498	transmute::<f64x8, _>(src:simd_cast(v256))
12499	}
12500
12501	/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12502	///
12503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
12504	#[inline]
12505	#[target_feature(enable = "avx512f")]
12506	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12507	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12508	pub unsafe fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12509	let convert: f64x8 = _mm512_cvtepu32lo_pd(v2).as_f64x8();
12510	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12511	}
12512
12513	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12514	///
12515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
12516	#[inline]
12517	#[target_feature(enable = "avx512f")]
12518	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12519	#[cfg_attr(test, assert_instr(vpmovdw))]
12520	pub unsafe fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
12521	let a: i32x16 = a.as_i32x16();
12522	transmute::<i16x16, _>(src:simd_cast(a))
12523	}
12524
12525	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12526	///
12527	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
12528	#[inline]
12529	#[target_feature(enable = "avx512f")]
12530	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12531	#[cfg_attr(test, assert_instr(vpmovdw))]
12532	pub unsafe fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12533	let convert: i16x16 = _mm512_cvtepi32_epi16(a).as_i16x16();
12534	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x16()))
12535	}
12536
12537	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12538	///
12539	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
12540	#[inline]
12541	#[target_feature(enable = "avx512f")]
12542	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12543	#[cfg_attr(test, assert_instr(vpmovdw))]
12544	pub unsafe fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12545	let convert: i16x16 = _mm512_cvtepi32_epi16(a).as_i16x16();
12546	let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
12547	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12548	}
12549
12550	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12551	///
12552	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
12553	#[inline]
12554	#[target_feature(enable = "avx512f,avx512vl")]
12555	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12556	#[cfg_attr(test, assert_instr(vpmovdw))]
12557	pub unsafe fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
12558	let a: i32x8 = a.as_i32x8();
12559	transmute::<i16x8, _>(src:simd_cast(a))
12560	}
12561
12562	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12563	///
12564	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
12565	#[inline]
12566	#[target_feature(enable = "avx512f,avx512vl")]
12567	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12568	#[cfg_attr(test, assert_instr(vpmovdw))]
12569	pub unsafe fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12570	let convert: i16x8 = _mm256_cvtepi32_epi16(a).as_i16x8();
12571	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
12572	}
12573
12574	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12575	///
12576	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
12577	#[inline]
12578	#[target_feature(enable = "avx512f,avx512vl")]
12579	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12580	#[cfg_attr(test, assert_instr(vpmovdw))]
12581	pub unsafe fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12582	let convert: i16x8 = _mm256_cvtepi32_epi16(a).as_i16x8();
12583	let zero: i16x8 = _mm_setzero_si128().as_i16x8();
12584	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12585	}
12586
12587	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12588	///
12589	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
12590	#[inline]
12591	#[target_feature(enable = "avx512f,avx512vl")]
12592	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12593	#[cfg_attr(test, assert_instr(vpmovdw))]
12594	pub unsafe fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
12595	transmute(src:vpmovdw128(
12596	a:a.as_i32x4(),
12597	src:_mm_setzero_si128().as_i16x8(),
12598	mask:`0b11111111`,
12599	))
12600	}
12601
12602	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12603	///
12604	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
12605	#[inline]
12606	#[target_feature(enable = "avx512f,avx512vl")]
12607	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12608	#[cfg_attr(test, assert_instr(vpmovdw))]
12609	pub unsafe fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12610	transmute(src:vpmovdw128(a:a.as_i32x4(), src:src.as_i16x8(), mask:k))
12611	}
12612
12613	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12614	///
12615	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
12616	#[inline]
12617	#[target_feature(enable = "avx512f,avx512vl")]
12618	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12619	#[cfg_attr(test, assert_instr(vpmovdw))]
12620	pub unsafe fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
12621	transmute(src:vpmovdw128(a:a.as_i32x4(), src:_mm_setzero_si128().as_i16x8(), mask:k))
12622	}
12623
12624	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12625	///
12626	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
12627	#[inline]
12628	#[target_feature(enable = "avx512f")]
12629	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12630	#[cfg_attr(test, assert_instr(vpmovdb))]
12631	pub unsafe fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
12632	let a: i32x16 = a.as_i32x16();
12633	transmute::<i8x16, _>(src:simd_cast(a))
12634	}
12635
12636	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12637	///
12638	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
12639	#[inline]
12640	#[target_feature(enable = "avx512f")]
12641	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12642	#[cfg_attr(test, assert_instr(vpmovdb))]
12643	pub unsafe fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
12644	let convert: i8x16 = _mm512_cvtepi32_epi8(a).as_i8x16();
12645	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i8x16()))
12646	}
12647
12648	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12649	///
12650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
12651	#[inline]
12652	#[target_feature(enable = "avx512f")]
12653	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12654	#[cfg_attr(test, assert_instr(vpmovdb))]
12655	pub unsafe fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
12656	let convert: i8x16 = _mm512_cvtepi32_epi8(a).as_i8x16();
12657	let zero: i8x16 = _mm_setzero_si128().as_i8x16();
12658	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12659	}
12660
12661	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12662	///
12663	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
12664	#[inline]
12665	#[target_feature(enable = "avx512f,avx512vl")]
12666	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12667	#[cfg_attr(test, assert_instr(vpmovdb))]
12668	pub unsafe fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
12669	transmute(src:vpmovdb256(
12670	a:a.as_i32x8(),
12671	src:_mm_setzero_si128().as_i8x16(),
12672	mask:`0b11111111`,
12673	))
12674	}
12675
12676	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12677	///
12678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
12679	#[inline]
12680	#[target_feature(enable = "avx512f,avx512vl")]
12681	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12682	#[cfg_attr(test, assert_instr(vpmovdb))]
12683	pub unsafe fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12684	transmute(src:vpmovdb256(a:a.as_i32x8(), src:src.as_i8x16(), mask:k))
12685	}
12686
12687	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12688	///
12689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
12690	#[inline]
12691	#[target_feature(enable = "avx512f,avx512vl")]
12692	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12693	#[cfg_attr(test, assert_instr(vpmovdb))]
12694	pub unsafe fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
12695	transmute(src:vpmovdb256(a:a.as_i32x8(), src:_mm_setzero_si128().as_i8x16(), mask:k))
12696	}
12697
12698	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12699	///
12700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
12701	#[inline]
12702	#[target_feature(enable = "avx512f,avx512vl")]
12703	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12704	#[cfg_attr(test, assert_instr(vpmovdb))]
12705	pub unsafe fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
12706	transmute(src:vpmovdb128(
12707	a:a.as_i32x4(),
12708	src:_mm_setzero_si128().as_i8x16(),
12709	mask:`0b11111111`,
12710	))
12711	}
12712
12713	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12714	///
12715	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
12716	#[inline]
12717	#[target_feature(enable = "avx512f,avx512vl")]
12718	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12719	#[cfg_attr(test, assert_instr(vpmovdb))]
12720	pub unsafe fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12721	transmute(src:vpmovdb128(a:a.as_i32x4(), src:src.as_i8x16(), mask:k))
12722	}
12723
12724	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12725	///
12726	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
12727	#[inline]
12728	#[target_feature(enable = "avx512f,avx512vl")]
12729	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12730	#[cfg_attr(test, assert_instr(vpmovdb))]
12731	pub unsafe fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
12732	transmute(src:vpmovdb128(a:a.as_i32x4(), src:_mm_setzero_si128().as_i8x16(), mask:k))
12733	}
12734
12735	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
12736	///
12737	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
12738	#[inline]
12739	#[target_feature(enable = "avx512f")]
12740	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12741	#[cfg_attr(test, assert_instr(vpmovqd))]
12742	pub unsafe fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
12743	let a: i64x8 = a.as_i64x8();
12744	transmute::<i32x8, _>(src:simd_cast(a))
12745	}
12746
12747	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12748	///
12749	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
12750	#[inline]
12751	#[target_feature(enable = "avx512f")]
12752	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12753	#[cfg_attr(test, assert_instr(vpmovqd))]
12754	pub unsafe fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
12755	let convert: i32x8 = _mm512_cvtepi64_epi32(a).as_i32x8();
12756	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
12757	}
12758
12759	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12760	///
12761	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
12762	#[inline]
12763	#[target_feature(enable = "avx512f")]
12764	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12765	#[cfg_attr(test, assert_instr(vpmovqd))]
12766	pub unsafe fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
12767	let convert: i32x8 = _mm512_cvtepi64_epi32(a).as_i32x8();
12768	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
12769	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12770	}
12771
12772	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
12773	///
12774	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
12775	#[inline]
12776	#[target_feature(enable = "avx512f,avx512vl")]
12777	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12778	#[cfg_attr(test, assert_instr(vpmovqd))]
12779	pub unsafe fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
12780	let a: i64x4 = a.as_i64x4();
12781	transmute::<i32x4, _>(src:simd_cast(a))
12782	}
12783
12784	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12785	///
12786	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
12787	#[inline]
12788	#[target_feature(enable = "avx512f,avx512vl")]
12789	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12790	#[cfg_attr(test, assert_instr(vpmovqd))]
12791	pub unsafe fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12792	let convert: i32x4 = _mm256_cvtepi64_epi32(a).as_i32x4();
12793	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
12794	}
12795
12796	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12797	///
12798	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
12799	#[inline]
12800	#[target_feature(enable = "avx512f,avx512vl")]
12801	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12802	#[cfg_attr(test, assert_instr(vpmovqd))]
12803	pub unsafe fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
12804	let convert: i32x4 = _mm256_cvtepi64_epi32(a).as_i32x4();
12805	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
12806	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12807	}
12808
12809	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
12810	///
12811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
12812	#[inline]
12813	#[target_feature(enable = "avx512f,avx512vl")]
12814	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12815	#[cfg_attr(test, assert_instr(vpmovqd))]
12816	pub unsafe fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
12817	transmute(src:vpmovqd128(
12818	a:a.as_i64x2(),
12819	src:_mm_setzero_si128().as_i32x4(),
12820	mask:`0b11111111`,
12821	))
12822	}
12823
12824	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12825	///
12826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
12827	#[inline]
12828	#[target_feature(enable = "avx512f,avx512vl")]
12829	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12830	#[cfg_attr(test, assert_instr(vpmovqd))]
12831	pub unsafe fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12832	transmute(src:vpmovqd128(a:a.as_i64x2(), src:src.as_i32x4(), mask:k))
12833	}
12834
12835	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12836	///
12837	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
12838	#[inline]
12839	#[target_feature(enable = "avx512f,avx512vl")]
12840	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12841	#[cfg_attr(test, assert_instr(vpmovqd))]
12842	pub unsafe fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
12843	transmute(src:vpmovqd128(a:a.as_i64x2(), src:_mm_setzero_si128().as_i32x4(), mask:k))
12844	}
12845
12846	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12847	///
12848	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
12849	#[inline]
12850	#[target_feature(enable = "avx512f")]
12851	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12852	#[cfg_attr(test, assert_instr(vpmovqw))]
12853	pub unsafe fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
12854	let a: i64x8 = a.as_i64x8();
12855	transmute::<i16x8, _>(src:simd_cast(a))
12856	}
12857
12858	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12859	///
12860	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
12861	#[inline]
12862	#[target_feature(enable = "avx512f")]
12863	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12864	#[cfg_attr(test, assert_instr(vpmovqw))]
12865	pub unsafe fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
12866	let convert: i16x8 = _mm512_cvtepi64_epi16(a).as_i16x8();
12867	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
12868	}
12869
12870	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12871	///
12872	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
12873	#[inline]
12874	#[target_feature(enable = "avx512f")]
12875	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12876	#[cfg_attr(test, assert_instr(vpmovqw))]
12877	pub unsafe fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
12878	let convert: i16x8 = _mm512_cvtepi64_epi16(a).as_i16x8();
12879	let zero: i16x8 = _mm_setzero_si128().as_i16x8();
12880	transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12881	}
12882
12883	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12884	///
12885	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
12886	#[inline]
12887	#[target_feature(enable = "avx512f,avx512vl")]
12888	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12889	#[cfg_attr(test, assert_instr(vpmovqw))]
12890	pub unsafe fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
12891	transmute(src:vpmovqw256(
12892	a:a.as_i64x4(),
12893	src:_mm_setzero_si128().as_i16x8(),
12894	mask:`0b11111111`,
12895	))
12896	}
12897
12898	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12899	///
12900	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
12901	#[inline]
12902	#[target_feature(enable = "avx512f,avx512vl")]
12903	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12904	#[cfg_attr(test, assert_instr(vpmovqw))]
12905	pub unsafe fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12906	transmute(src:vpmovqw256(a:a.as_i64x4(), src:src.as_i16x8(), mask:k))
12907	}
12908
12909	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12910	///
12911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
12912	#[inline]
12913	#[target_feature(enable = "avx512f,avx512vl")]
12914	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12915	#[cfg_attr(test, assert_instr(vpmovqw))]
12916	pub unsafe fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
12917	transmute(src:vpmovqw256(a:a.as_i64x4(), src:_mm_setzero_si128().as_i16x8(), mask:k))
12918	}
12919
12920	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12921	///
12922	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
12923	#[inline]
12924	#[target_feature(enable = "avx512f,avx512vl")]
12925	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12926	#[cfg_attr(test, assert_instr(vpmovqw))]
12927	pub unsafe fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
12928	transmute(src:vpmovqw128(
12929	a:a.as_i64x2(),
12930	src:_mm_setzero_si128().as_i16x8(),
12931	mask:`0b11111111`,
12932	))
12933	}
12934
12935	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12936	///
12937	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
12938	#[inline]
12939	#[target_feature(enable = "avx512f,avx512vl")]
12940	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12941	#[cfg_attr(test, assert_instr(vpmovqw))]
12942	pub unsafe fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12943	transmute(src:vpmovqw128(a:a.as_i64x2(), src:src.as_i16x8(), mask:k))
12944	}
12945
12946	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12947	///
12948	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
12949	#[inline]
12950	#[target_feature(enable = "avx512f,avx512vl")]
12951	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12952	#[cfg_attr(test, assert_instr(vpmovqw))]
12953	pub unsafe fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
12954	transmute(src:vpmovqw128(a:a.as_i64x2(), src:_mm_setzero_si128().as_i16x8(), mask:k))
12955	}
12956
12957	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12958	///
12959	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
12960	#[inline]
12961	#[target_feature(enable = "avx512f")]
12962	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12963	#[cfg_attr(test, assert_instr(vpmovqb))]
12964	pub unsafe fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
12965	transmute(src:vpmovqb(
12966	a:a.as_i64x8(),
12967	src:_mm_setzero_si128().as_i8x16(),
12968	mask:`0b11111111`,
12969	))
12970	}
12971
12972	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12973	///
12974	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
12975	#[inline]
12976	#[target_feature(enable = "avx512f")]
12977	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12978	#[cfg_attr(test, assert_instr(vpmovqb))]
12979	pub unsafe fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
12980	transmute(src:vpmovqb(a:a.as_i64x8(), src:src.as_i8x16(), mask:k))
12981	}
12982
12983	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12984	///
12985	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
12986	#[inline]
12987	#[target_feature(enable = "avx512f")]
12988	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12989	#[cfg_attr(test, assert_instr(vpmovqb))]
12990	pub unsafe fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
12991	transmute(src:vpmovqb(a:a.as_i64x8(), src:_mm_setzero_si128().as_i8x16(), mask:k))
12992	}
12993
12994	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12995	///
12996	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
12997	#[inline]
12998	#[target_feature(enable = "avx512f,avx512vl")]
12999	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13000	#[cfg_attr(test, assert_instr(vpmovqb))]
13001	pub unsafe fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13002	transmute(src:vpmovqb256(
13003	a:a.as_i64x4(),
13004	src:_mm_setzero_si128().as_i8x16(),
13005	mask:`0b11111111`,
13006	))
13007	}
13008
13009	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13010	///
13011	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13012	#[inline]
13013	#[target_feature(enable = "avx512f,avx512vl")]
13014	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13015	#[cfg_attr(test, assert_instr(vpmovqb))]
13016	pub unsafe fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13017	transmute(src:vpmovqb256(a:a.as_i64x4(), src:src.as_i8x16(), mask:k))
13018	}
13019
13020	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13021	///
13022	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13023	#[inline]
13024	#[target_feature(enable = "avx512f,avx512vl")]
13025	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13026	#[cfg_attr(test, assert_instr(vpmovqb))]
13027	pub unsafe fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13028	transmute(src:vpmovqb256(a:a.as_i64x4(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13029	}
13030
13031	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13032	///
13033	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13034	#[inline]
13035	#[target_feature(enable = "avx512f,avx512vl")]
13036	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13037	#[cfg_attr(test, assert_instr(vpmovqb))]
13038	pub unsafe fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13039	transmute(src:vpmovqb128(
13040	a:a.as_i64x2(),
13041	src:_mm_setzero_si128().as_i8x16(),
13042	mask:`0b11111111`,
13043	))
13044	}
13045
13046	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13047	///
13048	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13049	#[inline]
13050	#[target_feature(enable = "avx512f,avx512vl")]
13051	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13052	#[cfg_attr(test, assert_instr(vpmovqb))]
13053	pub unsafe fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13054	transmute(src:vpmovqb128(a:a.as_i64x2(), src:src.as_i8x16(), mask:k))
13055	}
13056
13057	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13058	///
13059	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13060	#[inline]
13061	#[target_feature(enable = "avx512f,avx512vl")]
13062	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13063	#[cfg_attr(test, assert_instr(vpmovqb))]
13064	pub unsafe fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13065	transmute(src:vpmovqb128(a:a.as_i64x2(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13066	}
13067
13068	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13069	///
13070	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13071	#[inline]
13072	#[target_feature(enable = "avx512f")]
13073	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13074	#[cfg_attr(test, assert_instr(vpmovsdw))]
13075	pub unsafe fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13076	transmute(src:vpmovsdw(
13077	a:a.as_i32x16(),
13078	src:_mm256_setzero_si256().as_i16x16(),
13079	mask:`0b11111111_11111111`,
13080	))
13081	}
13082
13083	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13084	///
13085	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13086	#[inline]
13087	#[target_feature(enable = "avx512f")]
13088	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13089	#[cfg_attr(test, assert_instr(vpmovsdw))]
13090	pub unsafe fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13091	transmute(src:vpmovsdw(a:a.as_i32x16(), src:src.as_i16x16(), mask:k))
13092	}
13093
13094	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13095	///
13096	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13097	#[inline]
13098	#[target_feature(enable = "avx512f")]
13099	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13100	#[cfg_attr(test, assert_instr(vpmovsdw))]
13101	pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13102	transmute(src:vpmovsdw(
13103	a:a.as_i32x16(),
13104	src:_mm256_setzero_si256().as_i16x16(),
13105	mask:k,
13106	))
13107	}
13108
13109	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13110	///
13111	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13112	#[inline]
13113	#[target_feature(enable = "avx512f,avx512vl")]
13114	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13115	#[cfg_attr(test, assert_instr(vpmovsdw))]
13116	pub unsafe fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13117	transmute(src:vpmovsdw256(
13118	a:a.as_i32x8(),
13119	src:_mm_setzero_si128().as_i16x8(),
13120	mask:`0b11111111`,
13121	))
13122	}
13123
13124	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13125	///
13126	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13127	#[inline]
13128	#[target_feature(enable = "avx512f,avx512vl")]
13129	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13130	#[cfg_attr(test, assert_instr(vpmovsdw))]
13131	pub unsafe fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13132	transmute(src:vpmovsdw256(a:a.as_i32x8(), src:src.as_i16x8(), mask:k))
13133	}
13134
13135	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13136	///
13137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13138	#[inline]
13139	#[target_feature(enable = "avx512f,avx512vl")]
13140	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13141	#[cfg_attr(test, assert_instr(vpmovsdw))]
13142	pub unsafe fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13143	transmute(src:vpmovsdw256(a:a.as_i32x8(), src:_mm_setzero_si128().as_i16x8(), mask:k))
13144	}
13145
13146	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13147	///
13148	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13149	#[inline]
13150	#[target_feature(enable = "avx512f,avx512vl")]
13151	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13152	#[cfg_attr(test, assert_instr(vpmovsdw))]
13153	pub unsafe fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13154	transmute(src:vpmovsdw128(
13155	a:a.as_i32x4(),
13156	src:_mm_setzero_si128().as_i16x8(),
13157	mask:`0b11111111`,
13158	))
13159	}
13160
13161	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13162	///
13163	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13164	#[inline]
13165	#[target_feature(enable = "avx512f,avx512vl")]
13166	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13167	#[cfg_attr(test, assert_instr(vpmovsdw))]
13168	pub unsafe fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13169	transmute(src:vpmovsdw128(a:a.as_i32x4(), src:src.as_i16x8(), mask:k))
13170	}
13171
13172	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13173	///
13174	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13175	#[inline]
13176	#[target_feature(enable = "avx512f,avx512vl")]
13177	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13178	#[cfg_attr(test, assert_instr(vpmovsdw))]
13179	pub unsafe fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13180	transmute(src:vpmovsdw128(a:a.as_i32x4(), src:_mm_setzero_si128().as_i16x8(), mask:k))
13181	}
13182
13183	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13184	///
13185	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13186	#[inline]
13187	#[target_feature(enable = "avx512f")]
13188	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13189	#[cfg_attr(test, assert_instr(vpmovsdb))]
13190	pub unsafe fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13191	transmute(src:vpmovsdb(
13192	a:a.as_i32x16(),
13193	src:_mm_setzero_si128().as_i8x16(),
13194	mask:`0b11111111_11111111`,
13195	))
13196	}
13197
13198	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13199	///
13200	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13201	#[inline]
13202	#[target_feature(enable = "avx512f")]
13203	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13204	#[cfg_attr(test, assert_instr(vpmovsdb))]
13205	pub unsafe fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13206	transmute(src:vpmovsdb(a:a.as_i32x16(), src:src.as_i8x16(), mask:k))
13207	}
13208
13209	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13210	///
13211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13212	#[inline]
13213	#[target_feature(enable = "avx512f")]
13214	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13215	#[cfg_attr(test, assert_instr(vpmovsdb))]
13216	pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13217	transmute(src:vpmovsdb(a:a.as_i32x16(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13218	}
13219
13220	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13221	///
13222	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13223	#[inline]
13224	#[target_feature(enable = "avx512f,avx512vl")]
13225	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13226	#[cfg_attr(test, assert_instr(vpmovsdb))]
13227	pub unsafe fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13228	transmute(src:vpmovsdb256(
13229	a:a.as_i32x8(),
13230	src:_mm_setzero_si128().as_i8x16(),
13231	mask:`0b11111111`,
13232	))
13233	}
13234
13235	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13236	///
13237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
13238	#[inline]
13239	#[target_feature(enable = "avx512f,avx512vl")]
13240	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13241	#[cfg_attr(test, assert_instr(vpmovsdb))]
13242	pub unsafe fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13243	transmute(src:vpmovsdb256(a:a.as_i32x8(), src:src.as_i8x16(), mask:k))
13244	}
13245
13246	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13247	///
13248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
13249	#[inline]
13250	#[target_feature(enable = "avx512f,avx512vl")]
13251	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13252	#[cfg_attr(test, assert_instr(vpmovsdb))]
13253	pub unsafe fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13254	transmute(src:vpmovsdb256(a:a.as_i32x8(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13255	}
13256
13257	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13258	///
13259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
13260	#[inline]
13261	#[target_feature(enable = "avx512f,avx512vl")]
13262	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13263	#[cfg_attr(test, assert_instr(vpmovsdb))]
13264	pub unsafe fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
13265	transmute(src:vpmovsdb128(
13266	a:a.as_i32x4(),
13267	src:_mm_setzero_si128().as_i8x16(),
13268	mask:`0b11111111`,
13269	))
13270	}
13271
13272	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13273	///
13274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
13275	#[inline]
13276	#[target_feature(enable = "avx512f,avx512vl")]
13277	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13278	#[cfg_attr(test, assert_instr(vpmovsdb))]
13279	pub unsafe fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13280	transmute(src:vpmovsdb128(a:a.as_i32x4(), src:src.as_i8x16(), mask:k))
13281	}
13282
13283	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13284	///
13285	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
13286	#[inline]
13287	#[target_feature(enable = "avx512f,avx512vl")]
13288	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13289	#[cfg_attr(test, assert_instr(vpmovsdb))]
13290	pub unsafe fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13291	transmute(src:vpmovsdb128(a:a.as_i32x4(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13292	}
13293
13294	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13295	///
13296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
13297	#[inline]
13298	#[target_feature(enable = "avx512f")]
13299	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13300	#[cfg_attr(test, assert_instr(vpmovsqd))]
13301	pub unsafe fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
13302	transmute(src:vpmovsqd(
13303	a:a.as_i64x8(),
13304	src:_mm256_setzero_si256().as_i32x8(),
13305	mask:`0b11111111`,
13306	))
13307	}
13308
13309	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13310	///
13311	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
13312	#[inline]
13313	#[target_feature(enable = "avx512f")]
13314	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13315	#[cfg_attr(test, assert_instr(vpmovsqd))]
13316	pub unsafe fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13317	transmute(src:vpmovsqd(a:a.as_i64x8(), src:src.as_i32x8(), mask:k))
13318	}
13319
13320	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13321	///
13322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
13323	#[inline]
13324	#[target_feature(enable = "avx512f")]
13325	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13326	#[cfg_attr(test, assert_instr(vpmovsqd))]
13327	pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13328	transmute(src:vpmovsqd(a:a.as_i64x8(), src:_mm256_setzero_si256().as_i32x8(), mask:k))
13329	}
13330
13331	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13332	///
13333	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
13334	#[inline]
13335	#[target_feature(enable = "avx512f,avx512vl")]
13336	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13337	#[cfg_attr(test, assert_instr(vpmovsqd))]
13338	pub unsafe fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
13339	transmute(src:vpmovsqd256(
13340	a:a.as_i64x4(),
13341	src:_mm_setzero_si128().as_i32x4(),
13342	mask:`0b11111111`,
13343	))
13344	}
13345
13346	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13347	///
13348	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
13349	#[inline]
13350	#[target_feature(enable = "avx512f,avx512vl")]
13351	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13352	#[cfg_attr(test, assert_instr(vpmovsqd))]
13353	pub unsafe fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13354	transmute(src:vpmovsqd256(a:a.as_i64x4(), src:src.as_i32x4(), mask:k))
13355	}
13356
13357	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13358	///
13359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
13360	#[inline]
13361	#[target_feature(enable = "avx512f,avx512vl")]
13362	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13363	#[cfg_attr(test, assert_instr(vpmovsqd))]
13364	pub unsafe fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13365	transmute(src:vpmovsqd256(a:a.as_i64x4(), src:_mm_setzero_si128().as_i32x4(), mask:k))
13366	}
13367
13368	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13369	///
13370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
13371	#[inline]
13372	#[target_feature(enable = "avx512f,avx512vl")]
13373	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13374	#[cfg_attr(test, assert_instr(vpmovsqd))]
13375	pub unsafe fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
13376	transmute(src:vpmovsqd128(
13377	a:a.as_i64x2(),
13378	src:_mm_setzero_si128().as_i32x4(),
13379	mask:`0b11111111`,
13380	))
13381	}
13382
13383	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13384	///
13385	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
13386	#[inline]
13387	#[target_feature(enable = "avx512f,avx512vl")]
13388	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13389	#[cfg_attr(test, assert_instr(vpmovsqd))]
13390	pub unsafe fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13391	transmute(src:vpmovsqd128(a:a.as_i64x2(), src:src.as_i32x4(), mask:k))
13392	}
13393
13394	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13395	///
13396	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
13397	#[inline]
13398	#[target_feature(enable = "avx512f,avx512vl")]
13399	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13400	#[cfg_attr(test, assert_instr(vpmovsqd))]
13401	pub unsafe fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13402	transmute(src:vpmovsqd128(a:a.as_i64x2(), src:_mm_setzero_si128().as_i32x4(), mask:k))
13403	}
13404
13405	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13406	///
13407	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
13408	#[inline]
13409	#[target_feature(enable = "avx512f")]
13410	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13411	#[cfg_attr(test, assert_instr(vpmovsqw))]
13412	pub unsafe fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
13413	transmute(src:vpmovsqw(
13414	a:a.as_i64x8(),
13415	src:_mm_setzero_si128().as_i16x8(),
13416	mask:`0b11111111`,
13417	))
13418	}
13419
13420	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13421	///
13422	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
13423	#[inline]
13424	#[target_feature(enable = "avx512f")]
13425	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13426	#[cfg_attr(test, assert_instr(vpmovsqw))]
13427	pub unsafe fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13428	transmute(src:vpmovsqw(a:a.as_i64x8(), src:src.as_i16x8(), mask:k))
13429	}
13430
13431	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13432	///
13433	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
13434	#[inline]
13435	#[target_feature(enable = "avx512f")]
13436	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13437	#[cfg_attr(test, assert_instr(vpmovsqw))]
13438	pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13439	transmute(src:vpmovsqw(a:a.as_i64x8(), src:_mm_setzero_si128().as_i16x8(), mask:k))
13440	}
13441
13442	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13443	///
13444	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
13445	#[inline]
13446	#[target_feature(enable = "avx512f,avx512vl")]
13447	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13448	#[cfg_attr(test, assert_instr(vpmovsqw))]
13449	pub unsafe fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
13450	transmute(src:vpmovsqw256(
13451	a:a.as_i64x4(),
13452	src:_mm_setzero_si128().as_i16x8(),
13453	mask:`0b11111111`,
13454	))
13455	}
13456
13457	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13458	///
13459	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
13460	#[inline]
13461	#[target_feature(enable = "avx512f,avx512vl")]
13462	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13463	#[cfg_attr(test, assert_instr(vpmovsqw))]
13464	pub unsafe fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13465	transmute(src:vpmovsqw256(a:a.as_i64x4(), src:src.as_i16x8(), mask:k))
13466	}
13467
13468	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13469	///
13470	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
13471	#[inline]
13472	#[target_feature(enable = "avx512f,avx512vl")]
13473	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13474	#[cfg_attr(test, assert_instr(vpmovsqw))]
13475	pub unsafe fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13476	transmute(src:vpmovsqw256(a:a.as_i64x4(), src:_mm_setzero_si128().as_i16x8(), mask:k))
13477	}
13478
13479	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13480	///
13481	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
13482	#[inline]
13483	#[target_feature(enable = "avx512f,avx512vl")]
13484	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13485	#[cfg_attr(test, assert_instr(vpmovsqw))]
13486	pub unsafe fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
13487	transmute(src:vpmovsqw128(
13488	a:a.as_i64x2(),
13489	src:_mm_setzero_si128().as_i16x8(),
13490	mask:`0b11111111`,
13491	))
13492	}
13493
13494	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13495	///
13496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
13497	#[inline]
13498	#[target_feature(enable = "avx512f,avx512vl")]
13499	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13500	#[cfg_attr(test, assert_instr(vpmovsqw))]
13501	pub unsafe fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13502	transmute(src:vpmovsqw128(a:a.as_i64x2(), src:src.as_i16x8(), mask:k))
13503	}
13504
13505	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13506	///
13507	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
13508	#[inline]
13509	#[target_feature(enable = "avx512f,avx512vl")]
13510	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13511	#[cfg_attr(test, assert_instr(vpmovsqw))]
13512	pub unsafe fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13513	transmute(src:vpmovsqw128(a:a.as_i64x2(), src:_mm_setzero_si128().as_i16x8(), mask:k))
13514	}
13515
13516	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13517	///
13518	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
13519	#[inline]
13520	#[target_feature(enable = "avx512f")]
13521	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13522	#[cfg_attr(test, assert_instr(vpmovsqb))]
13523	pub unsafe fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
13524	transmute(src:vpmovsqb(
13525	a:a.as_i64x8(),
13526	src:_mm_setzero_si128().as_i8x16(),
13527	mask:`0b11111111`,
13528	))
13529	}
13530
13531	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13532	///
13533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
13534	#[inline]
13535	#[target_feature(enable = "avx512f")]
13536	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13537	#[cfg_attr(test, assert_instr(vpmovsqb))]
13538	pub unsafe fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13539	transmute(src:vpmovsqb(a:a.as_i64x8(), src:src.as_i8x16(), mask:k))
13540	}
13541
13542	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13543	///
13544	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
13545	#[inline]
13546	#[target_feature(enable = "avx512f")]
13547	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13548	#[cfg_attr(test, assert_instr(vpmovsqb))]
13549	pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13550	transmute(src:vpmovsqb(a:a.as_i64x8(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13551	}
13552
13553	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13554	///
13555	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
13556	#[inline]
13557	#[target_feature(enable = "avx512f,avx512vl")]
13558	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13559	#[cfg_attr(test, assert_instr(vpmovsqb))]
13560	pub unsafe fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
13561	transmute(src:vpmovsqb256(
13562	a:a.as_i64x4(),
13563	src:_mm_setzero_si128().as_i8x16(),
13564	mask:`0b11111111`,
13565	))
13566	}
13567
13568	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13569	///
13570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
13571	#[inline]
13572	#[target_feature(enable = "avx512f,avx512vl")]
13573	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13574	#[cfg_attr(test, assert_instr(vpmovsqb))]
13575	pub unsafe fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13576	transmute(src:vpmovsqb256(a:a.as_i64x4(), src:src.as_i8x16(), mask:k))
13577	}
13578
13579	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13580	///
13581	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
13582	#[inline]
13583	#[target_feature(enable = "avx512f,avx512vl")]
13584	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13585	#[cfg_attr(test, assert_instr(vpmovsqb))]
13586	pub unsafe fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13587	transmute(src:vpmovsqb256(a:a.as_i64x4(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13588	}
13589
13590	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13591	///
13592	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
13593	#[inline]
13594	#[target_feature(enable = "avx512f,avx512vl")]
13595	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13596	#[cfg_attr(test, assert_instr(vpmovsqb))]
13597	pub unsafe fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
13598	transmute(src:vpmovsqb128(
13599	a:a.as_i64x2(),
13600	src:_mm_setzero_si128().as_i8x16(),
13601	mask:`0b11111111`,
13602	))
13603	}
13604
13605	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13606	///
13607	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
13608	#[inline]
13609	#[target_feature(enable = "avx512f,avx512vl")]
13610	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13611	#[cfg_attr(test, assert_instr(vpmovsqb))]
13612	pub unsafe fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13613	transmute(src:vpmovsqb128(a:a.as_i64x2(), src:src.as_i8x16(), mask:k))
13614	}
13615
13616	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13617	///
13618	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
13619	#[inline]
13620	#[target_feature(enable = "avx512f,avx512vl")]
13621	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13622	#[cfg_attr(test, assert_instr(vpmovsqb))]
13623	pub unsafe fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13624	transmute(src:vpmovsqb128(a:a.as_i64x2(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13625	}
13626
13627	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13628	///
13629	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
13630	#[inline]
13631	#[target_feature(enable = "avx512f")]
13632	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13633	#[cfg_attr(test, assert_instr(vpmovusdw))]
13634	pub unsafe fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
13635	transmute(src:vpmovusdw(
13636	a:a.as_u32x16(),
13637	src:_mm256_setzero_si256().as_u16x16(),
13638	mask:`0b11111111_11111111`,
13639	))
13640	}
13641
13642	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13643	///
13644	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
13645	#[inline]
13646	#[target_feature(enable = "avx512f")]
13647	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13648	#[cfg_attr(test, assert_instr(vpmovusdw))]
13649	pub unsafe fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13650	transmute(src:vpmovusdw(a:a.as_u32x16(), src:src.as_u16x16(), mask:k))
13651	}
13652
13653	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13654	///
13655	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
13656	#[inline]
13657	#[target_feature(enable = "avx512f")]
13658	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13659	#[cfg_attr(test, assert_instr(vpmovusdw))]
13660	pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13661	transmute(src:vpmovusdw(
13662	a:a.as_u32x16(),
13663	src:_mm256_setzero_si256().as_u16x16(),
13664	mask:k,
13665	))
13666	}
13667
13668	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13669	///
13670	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
13671	#[inline]
13672	#[target_feature(enable = "avx512f,avx512vl")]
13673	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13674	#[cfg_attr(test, assert_instr(vpmovusdw))]
13675	pub unsafe fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
13676	transmute(src:vpmovusdw256(
13677	a:a.as_u32x8(),
13678	src:_mm_setzero_si128().as_u16x8(),
13679	mask:`0b11111111`,
13680	))
13681	}
13682
13683	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13684	///
13685	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
13686	#[inline]
13687	#[target_feature(enable = "avx512f,avx512vl")]
13688	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13689	#[cfg_attr(test, assert_instr(vpmovusdw))]
13690	pub unsafe fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13691	transmute(src:vpmovusdw256(a:a.as_u32x8(), src:src.as_u16x8(), mask:k))
13692	}
13693
13694	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13695	///
13696	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
13697	#[inline]
13698	#[target_feature(enable = "avx512f,avx512vl")]
13699	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13700	#[cfg_attr(test, assert_instr(vpmovusdw))]
13701	pub unsafe fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13702	transmute(src:vpmovusdw256(
13703	a:a.as_u32x8(),
13704	src:_mm_setzero_si128().as_u16x8(),
13705	mask:k,
13706	))
13707	}
13708
13709	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13710	///
13711	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
13712	#[inline]
13713	#[target_feature(enable = "avx512f,avx512vl")]
13714	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13715	#[cfg_attr(test, assert_instr(vpmovusdw))]
13716	pub unsafe fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
13717	transmute(src:vpmovusdw128(
13718	a:a.as_u32x4(),
13719	src:_mm_setzero_si128().as_u16x8(),
13720	mask:`0b11111111`,
13721	))
13722	}
13723
13724	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13725	///
13726	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
13727	#[inline]
13728	#[target_feature(enable = "avx512f,avx512vl")]
13729	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13730	#[cfg_attr(test, assert_instr(vpmovusdw))]
13731	pub unsafe fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13732	transmute(src:vpmovusdw128(a:a.as_u32x4(), src:src.as_u16x8(), mask:k))
13733	}
13734
13735	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13736	///
13737	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
13738	#[inline]
13739	#[target_feature(enable = "avx512f,avx512vl")]
13740	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13741	#[cfg_attr(test, assert_instr(vpmovusdw))]
13742	pub unsafe fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13743	transmute(src:vpmovusdw128(
13744	a:a.as_u32x4(),
13745	src:_mm_setzero_si128().as_u16x8(),
13746	mask:k,
13747	))
13748	}
13749
13750	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
13751	///
13752	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
13753	#[inline]
13754	#[target_feature(enable = "avx512f")]
13755	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13756	#[cfg_attr(test, assert_instr(vpmovusdb))]
13757	pub unsafe fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
13758	transmute(src:vpmovusdb(
13759	a:a.as_u32x16(),
13760	src:_mm_setzero_si128().as_u8x16(),
13761	mask:`0b11111111_11111111`,
13762	))
13763	}
13764
13765	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13766	///
13767	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
13768	#[inline]
13769	#[target_feature(enable = "avx512f")]
13770	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13771	#[cfg_attr(test, assert_instr(vpmovusdb))]
13772	pub unsafe fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13773	transmute(src:vpmovusdb(a:a.as_u32x16(), src:src.as_u8x16(), mask:k))
13774	}
13775
13776	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13777	///
13778	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
13779	#[inline]
13780	#[target_feature(enable = "avx512f")]
13781	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13782	#[cfg_attr(test, assert_instr(vpmovusdb))]
13783	pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13784	transmute(src:vpmovusdb(a:a.as_u32x16(), src:_mm_setzero_si128().as_u8x16(), mask:k))
13785	}
13786
13787	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
13788	///
13789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
13790	#[inline]
13791	#[target_feature(enable = "avx512f,avx512vl")]
13792	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13793	#[cfg_attr(test, assert_instr(vpmovusdb))]
13794	pub unsafe fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
13795	transmute(src:vpmovusdb256(
13796	a:a.as_u32x8(),
13797	src:_mm_setzero_si128().as_u8x16(),
13798	mask:`0b11111111`,
13799	))
13800	}
13801
13802	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13803	///
13804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
13805	#[inline]
13806	#[target_feature(enable = "avx512f,avx512vl")]
13807	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13808	#[cfg_attr(test, assert_instr(vpmovusdb))]
13809	pub unsafe fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13810	transmute(src:vpmovusdb256(a:a.as_u32x8(), src:src.as_u8x16(), mask:k))
13811	}
13812
13813	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13814	///
13815	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
13816	#[inline]
13817	#[target_feature(enable = "avx512f,avx512vl")]
13818	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13819	#[cfg_attr(test, assert_instr(vpmovusdb))]
13820	pub unsafe fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13821	transmute(src:vpmovusdb256(
13822	a:a.as_u32x8(),
13823	src:_mm_setzero_si128().as_u8x16(),
13824	mask:k,
13825	))
13826	}
13827
13828	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
13829	///
13830	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
13831	#[inline]
13832	#[target_feature(enable = "avx512f,avx512vl")]
13833	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13834	#[cfg_attr(test, assert_instr(vpmovusdb))]
13835	pub unsafe fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
13836	transmute(src:vpmovusdb128(
13837	a:a.as_u32x4(),
13838	src:_mm_setzero_si128().as_u8x16(),
13839	mask:`0b11111111`,
13840	))
13841	}
13842
13843	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13844	///
13845	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
13846	#[inline]
13847	#[target_feature(enable = "avx512f,avx512vl")]
13848	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13849	#[cfg_attr(test, assert_instr(vpmovusdb))]
13850	pub unsafe fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13851	transmute(src:vpmovusdb128(a:a.as_u32x4(), src:src.as_u8x16(), mask:k))
13852	}
13853
13854	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13855	///
13856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
13857	#[inline]
13858	#[target_feature(enable = "avx512f,avx512vl")]
13859	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13860	#[cfg_attr(test, assert_instr(vpmovusdb))]
13861	pub unsafe fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13862	transmute(src:vpmovusdb128(
13863	a:a.as_u32x4(),
13864	src:_mm_setzero_si128().as_u8x16(),
13865	mask:k,
13866	))
13867	}
13868
13869	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
13870	///
13871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
13872	#[inline]
13873	#[target_feature(enable = "avx512f")]
13874	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13875	#[cfg_attr(test, assert_instr(vpmovusqd))]
13876	pub unsafe fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
13877	transmute(src:vpmovusqd(
13878	a:a.as_u64x8(),
13879	src:_mm256_setzero_si256().as_u32x8(),
13880	mask:`0b11111111`,
13881	))
13882	}
13883
13884	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13885	///
13886	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
13887	#[inline]
13888	#[target_feature(enable = "avx512f")]
13889	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13890	#[cfg_attr(test, assert_instr(vpmovusqd))]
13891	pub unsafe fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13892	transmute(src:vpmovusqd(a:a.as_u64x8(), src:src.as_u32x8(), mask:k))
13893	}
13894
13895	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13896	///
13897	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
13898	#[inline]
13899	#[target_feature(enable = "avx512f")]
13900	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13901	#[cfg_attr(test, assert_instr(vpmovusqd))]
13902	pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13903	transmute(src:vpmovusqd(
13904	a:a.as_u64x8(),
13905	src:_mm256_setzero_si256().as_u32x8(),
13906	mask:k,
13907	))
13908	}
13909
13910	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
13911	///
13912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
13913	#[inline]
13914	#[target_feature(enable = "avx512f,avx512vl")]
13915	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13916	#[cfg_attr(test, assert_instr(vpmovusqd))]
13917	pub unsafe fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
13918	transmute(src:vpmovusqd256(
13919	a:a.as_u64x4(),
13920	src:_mm_setzero_si128().as_u32x4(),
13921	mask:`0b11111111`,
13922	))
13923	}
13924
13925	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13926	///
13927	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
13928	#[inline]
13929	#[target_feature(enable = "avx512f,avx512vl")]
13930	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13931	#[cfg_attr(test, assert_instr(vpmovusqd))]
13932	pub unsafe fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13933	transmute(src:vpmovusqd256(a:a.as_u64x4(), src:src.as_u32x4(), mask:k))
13934	}
13935
13936	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13937	///
13938	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
13939	#[inline]
13940	#[target_feature(enable = "avx512f,avx512vl")]
13941	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13942	#[cfg_attr(test, assert_instr(vpmovusqd))]
13943	pub unsafe fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13944	transmute(src:vpmovusqd256(
13945	a:a.as_u64x4(),
13946	src:_mm_setzero_si128().as_u32x4(),
13947	mask:k,
13948	))
13949	}
13950
13951	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
13952	///
13953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
13954	#[inline]
13955	#[target_feature(enable = "avx512f,avx512vl")]
13956	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13957	#[cfg_attr(test, assert_instr(vpmovusqd))]
13958	pub unsafe fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
13959	transmute(src:vpmovusqd128(
13960	a:a.as_u64x2(),
13961	src:_mm_setzero_si128().as_u32x4(),
13962	mask:`0b11111111`,
13963	))
13964	}
13965
13966	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13967	///
13968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
13969	#[inline]
13970	#[target_feature(enable = "avx512f,avx512vl")]
13971	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13972	#[cfg_attr(test, assert_instr(vpmovusqd))]
13973	pub unsafe fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13974	transmute(src:vpmovusqd128(a:a.as_u64x2(), src:src.as_u32x4(), mask:k))
13975	}
13976
13977	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13978	///
13979	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
13980	#[inline]
13981	#[target_feature(enable = "avx512f,avx512vl")]
13982	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13983	#[cfg_attr(test, assert_instr(vpmovusqd))]
13984	pub unsafe fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13985	transmute(src:vpmovusqd128(
13986	a:a.as_u64x2(),
13987	src:_mm_setzero_si128().as_u32x4(),
13988	mask:k,
13989	))
13990	}
13991
13992	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13993	///
13994	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
13995	#[inline]
13996	#[target_feature(enable = "avx512f")]
13997	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13998	#[cfg_attr(test, assert_instr(vpmovusqw))]
13999	pub unsafe fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14000	transmute(src:vpmovusqw(
14001	a:a.as_u64x8(),
14002	src:_mm_setzero_si128().as_u16x8(),
14003	mask:`0b11111111`,
14004	))
14005	}
14006
14007	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14008	///
14009	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14010	#[inline]
14011	#[target_feature(enable = "avx512f")]
14012	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14013	#[cfg_attr(test, assert_instr(vpmovusqw))]
14014	pub unsafe fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14015	transmute(src:vpmovusqw(a:a.as_u64x8(), src:src.as_u16x8(), mask:k))
14016	}
14017
14018	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14019	///
14020	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14021	#[inline]
14022	#[target_feature(enable = "avx512f")]
14023	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14024	#[cfg_attr(test, assert_instr(vpmovusqw))]
14025	pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14026	transmute(src:vpmovusqw(a:a.as_u64x8(), src:_mm_setzero_si128().as_u16x8(), mask:k))
14027	}
14028
14029	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14030	///
14031	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14032	#[inline]
14033	#[target_feature(enable = "avx512f,avx512vl")]
14034	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14035	#[cfg_attr(test, assert_instr(vpmovusqw))]
14036	pub unsafe fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14037	transmute(src:vpmovusqw256(
14038	a:a.as_u64x4(),
14039	src:_mm_setzero_si128().as_u16x8(),
14040	mask:`0b11111111`,
14041	))
14042	}
14043
14044	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14045	///
14046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14047	#[inline]
14048	#[target_feature(enable = "avx512f,avx512vl")]
14049	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14050	#[cfg_attr(test, assert_instr(vpmovusqw))]
14051	pub unsafe fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14052	transmute(src:vpmovusqw256(a:a.as_u64x4(), src:src.as_u16x8(), mask:k))
14053	}
14054
14055	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14056	///
14057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14058	#[inline]
14059	#[target_feature(enable = "avx512f,avx512vl")]
14060	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14061	#[cfg_attr(test, assert_instr(vpmovusqw))]
14062	pub unsafe fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14063	transmute(src:vpmovusqw256(
14064	a:a.as_u64x4(),
14065	src:_mm_setzero_si128().as_u16x8(),
14066	mask:k,
14067	))
14068	}
14069
14070	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14071	///
14072	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14073	#[inline]
14074	#[target_feature(enable = "avx512f,avx512vl")]
14075	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14076	#[cfg_attr(test, assert_instr(vpmovusqw))]
14077	pub unsafe fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14078	transmute(src:vpmovusqw128(
14079	a:a.as_u64x2(),
14080	src:_mm_setzero_si128().as_u16x8(),
14081	mask:`0b11111111`,
14082	))
14083	}
14084
14085	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14086	///
14087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14088	#[inline]
14089	#[target_feature(enable = "avx512f,avx512vl")]
14090	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14091	#[cfg_attr(test, assert_instr(vpmovusqw))]
14092	pub unsafe fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14093	transmute(src:vpmovusqw128(a:a.as_u64x2(), src:src.as_u16x8(), mask:k))
14094	}
14095
14096	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14097	///
14098	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14099	#[inline]
14100	#[target_feature(enable = "avx512f,avx512vl")]
14101	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14102	#[cfg_attr(test, assert_instr(vpmovusqw))]
14103	pub unsafe fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14104	transmute(src:vpmovusqw128(
14105	a:a.as_u64x2(),
14106	src:_mm_setzero_si128().as_u16x8(),
14107	mask:k,
14108	))
14109	}
14110
14111	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14112	///
14113	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14114	#[inline]
14115	#[target_feature(enable = "avx512f")]
14116	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14117	#[cfg_attr(test, assert_instr(vpmovusqb))]
14118	pub unsafe fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14119	transmute(src:vpmovusqb(
14120	a:a.as_u64x8(),
14121	src:_mm_setzero_si128().as_u8x16(),
14122	mask:`0b11111111`,
14123	))
14124	}
14125
14126	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14127	///
14128	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14129	#[inline]
14130	#[target_feature(enable = "avx512f")]
14131	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14132	#[cfg_attr(test, assert_instr(vpmovusqb))]
14133	pub unsafe fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14134	transmute(src:vpmovusqb(a:a.as_u64x8(), src:src.as_u8x16(), mask:k))
14135	}
14136
14137	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14138	///
14139	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14140	#[inline]
14141	#[target_feature(enable = "avx512f")]
14142	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14143	#[cfg_attr(test, assert_instr(vpmovusqb))]
14144	pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14145	transmute(src:vpmovusqb(a:a.as_u64x8(), src:_mm_setzero_si128().as_u8x16(), mask:k))
14146	}
14147
14148	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14149	///
14150	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14151	#[inline]
14152	#[target_feature(enable = "avx512f,avx512vl")]
14153	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14154	#[cfg_attr(test, assert_instr(vpmovusqb))]
14155	pub unsafe fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14156	transmute(src:vpmovusqb256(
14157	a:a.as_u64x4(),
14158	src:_mm_setzero_si128().as_u8x16(),
14159	mask:`0b11111111`,
14160	))
14161	}
14162
14163	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14164	///
14165	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14166	#[inline]
14167	#[target_feature(enable = "avx512f,avx512vl")]
14168	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14169	#[cfg_attr(test, assert_instr(vpmovusqb))]
14170	pub unsafe fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14171	transmute(src:vpmovusqb256(a:a.as_u64x4(), src:src.as_u8x16(), mask:k))
14172	}
14173
14174	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14175	///
14176	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14177	#[inline]
14178	#[target_feature(enable = "avx512f,avx512vl")]
14179	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14180	#[cfg_attr(test, assert_instr(vpmovusqb))]
14181	pub unsafe fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14182	transmute(src:vpmovusqb256(
14183	a:a.as_u64x4(),
14184	src:_mm_setzero_si128().as_u8x16(),
14185	mask:k,
14186	))
14187	}
14188
14189	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14190	///
14191	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14192	#[inline]
14193	#[target_feature(enable = "avx512f,avx512vl")]
14194	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14195	#[cfg_attr(test, assert_instr(vpmovusqb))]
14196	pub unsafe fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14197	transmute(src:vpmovusqb128(
14198	a:a.as_u64x2(),
14199	src:_mm_setzero_si128().as_u8x16(),
14200	mask:`0b11111111`,
14201	))
14202	}
14203
14204	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14205	///
14206	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14207	#[inline]
14208	#[target_feature(enable = "avx512f,avx512vl")]
14209	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14210	#[cfg_attr(test, assert_instr(vpmovusqb))]
14211	pub unsafe fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14212	transmute(src:vpmovusqb128(a:a.as_u64x2(), src:src.as_u8x16(), mask:k))
14213	}
14214
14215	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14216	///
14217	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14218	#[inline]
14219	#[target_feature(enable = "avx512f,avx512vl")]
14220	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14221	#[cfg_attr(test, assert_instr(vpmovusqb))]
14222	pub unsafe fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14223	transmute(src:vpmovusqb128(
14224	a:a.as_u64x2(),
14225	src:_mm_setzero_si128().as_u8x16(),
14226	mask:k,
14227	))
14228	}
14229
14230	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14231	///
14232	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14233	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
14234	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions
14235	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions
14236	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
14237	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14238	///
14239	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14240	#[inline]
14241	#[target_feature(enable = "avx512f")]
14242	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14243	#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = `8`))]
14244	#[rustc_legacy_const_generics(`1`)]
14245	pub unsafe fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14246	static_assert_rounding!(ROUNDING);
14247	let a: f32x16 = a.as_f32x16();
14248	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
14249	let r: i32x16 = vcvtps2dq(a, src:zero, mask:`0b11111111_11111111`, ROUNDING);
14250	transmute(src:r)
14251	}
14252
14253	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14254	///
14255	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14256	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14257	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14258	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14259	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14260	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14261	///
14262	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14263	#[inline]
14264	#[target_feature(enable = "avx512f")]
14265	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14266	#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = `8`))]
14267	#[rustc_legacy_const_generics(`3`)]
14268	pub unsafe fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14269	src: __m512i,
14270	k: __mmask16,
14271	a: __m512,
14272	) -> __m512i {
14273	static_assert_rounding!(ROUNDING);
14274	let a: f32x16 = a.as_f32x16();
14275	let src: i32x16 = src.as_i32x16();
14276	let r: i32x16 = vcvtps2dq(a, src, mask:k, ROUNDING);
14277	transmute(src:r)
14278	}
14279
14280	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14281	///
14282	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14283	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14284	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14285	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14286	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14287	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14288	///
14289	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14290	#[inline]
14291	#[target_feature(enable = "avx512f")]
14292	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14293	#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = `8`))]
14294	#[rustc_legacy_const_generics(`2`)]
14295	pub unsafe fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(
14296	k: __mmask16,
14297	a: __m512,
14298	) -> __m512i {
14299	static_assert_rounding!(ROUNDING);
14300	let a: f32x16 = a.as_f32x16();
14301	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
14302	let r: i32x16 = vcvtps2dq(a, src:zero, mask:k, ROUNDING);
14303	transmute(src:r)
14304	}
14305
14306	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14307	///
14308	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14309	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14310	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14311	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14312	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14313	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14314	///
14315	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14316	#[inline]
14317	#[target_feature(enable = "avx512f")]
14318	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14319	#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = `8`))]
14320	#[rustc_legacy_const_generics(`1`)]
14321	pub unsafe fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14322	static_assert_rounding!(ROUNDING);
14323	let a: f32x16 = a.as_f32x16();
14324	let zero: u32x16 = _mm512_setzero_si512().as_u32x16();
14325	let r: u32x16 = vcvtps2udq(a, src:zero, mask:`0b11111111_11111111`, ROUNDING);
14326	transmute(src:r)
14327	}
14328
14329	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14330	///
14331	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14332	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14333	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14334	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14335	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14336	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14337	///
14338	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14339	#[inline]
14340	#[target_feature(enable = "avx512f")]
14341	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14342	#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = `8`))]
14343	#[rustc_legacy_const_generics(`3`)]
14344	pub unsafe fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14345	src: __m512i,
14346	k: __mmask16,
14347	a: __m512,
14348	) -> __m512i {
14349	static_assert_rounding!(ROUNDING);
14350	let a: f32x16 = a.as_f32x16();
14351	let src: u32x16 = src.as_u32x16();
14352	let r: u32x16 = vcvtps2udq(a, src, mask:k, ROUNDING);
14353	transmute(src:r)
14354	}
14355
14356	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14357	///
14358	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14359	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14360	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14361	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14362	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14363	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14364	///
14365	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14366	#[inline]
14367	#[target_feature(enable = "avx512f")]
14368	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14369	#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = `8`))]
14370	#[rustc_legacy_const_generics(`2`)]
14371	pub unsafe fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(
14372	k: __mmask16,
14373	a: __m512,
14374	) -> __m512i {
14375	static_assert_rounding!(ROUNDING);
14376	let a: f32x16 = a.as_f32x16();
14377	let zero: u32x16 = _mm512_setzero_si512().as_u32x16();
14378	let r: u32x16 = vcvtps2udq(a, src:zero, mask:k, ROUNDING);
14379	transmute(src:r)
14380	}
14381
14382	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
14383	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14384	///
14385	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
14386	#[inline]
14387	#[target_feature(enable = "avx512f")]
14388	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14389	#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = `8`))]
14390	#[rustc_legacy_const_generics(`1`)]
14391	pub unsafe fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
14392	static_assert_sae!(SAE);
14393	let a: f32x8 = a.as_f32x8();
14394	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
14395	let r: f64x8 = vcvtps2pd(a, src:zero, mask:`0b11111111`, SAE);
14396	transmute(src:r)
14397	}
14398
14399	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14400	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14401	///
14402	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14403	#[inline]
14404	#[target_feature(enable = "avx512f")]
14405	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14406	#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = `8`))]
14407	#[rustc_legacy_const_generics(`3`)]
14408	pub unsafe fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(
14409	src: __m512d,
14410	k: __mmask8,
14411	a: __m256,
14412	) -> __m512d {
14413	static_assert_sae!(SAE);
14414	let a: f32x8 = a.as_f32x8();
14415	let src: f64x8 = src.as_f64x8();
14416	let r: f64x8 = vcvtps2pd(a, src, mask:k, SAE);
14417	transmute(src:r)
14418	}
14419
14420	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14421	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14422	///
14423	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14424	#[inline]
14425	#[target_feature(enable = "avx512f")]
14426	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14427	#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = `8`))]
14428	#[rustc_legacy_const_generics(`2`)]
14429	pub unsafe fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
14430	static_assert_sae!(SAE);
14431	let a: f32x8 = a.as_f32x8();
14432	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
14433	let r: f64x8 = vcvtps2pd(a, src:zero, mask:k, SAE);
14434	transmute(src:r)
14435	}
14436
14437	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
14438	///
14439	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14440	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14441	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14442	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14443	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14444	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14445	///
14446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
14447	#[inline]
14448	#[target_feature(enable = "avx512f")]
14449	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14450	#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = `8`))]
14451	#[rustc_legacy_const_generics(`1`)]
14452	pub unsafe fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14453	static_assert_rounding!(ROUNDING);
14454	let a: f64x8 = a.as_f64x8();
14455	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
14456	let r: i32x8 = vcvtpd2dq(a, src:zero, mask:`0b11111111`, ROUNDING);
14457	transmute(src:r)
14458	}
14459
14460	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14461	///
14462	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14463	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14464	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14465	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14466	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14467	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14468	///
14469	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
14470	#[inline]
14471	#[target_feature(enable = "avx512f")]
14472	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14473	#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = `8`))]
14474	#[rustc_legacy_const_generics(`3`)]
14475	pub unsafe fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
14476	src: __m256i,
14477	k: __mmask8,
14478	a: __m512d,
14479	) -> __m256i {
14480	static_assert_rounding!(ROUNDING);
14481	let a: f64x8 = a.as_f64x8();
14482	let src: i32x8 = src.as_i32x8();
14483	let r: i32x8 = vcvtpd2dq(a, src, mask:k, ROUNDING);
14484	transmute(src:r)
14485	}
14486
14487	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14488	///
14489	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14490	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14491	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14492	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14493	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14494	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14495	///
14496	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
14497	#[inline]
14498	#[target_feature(enable = "avx512f")]
14499	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14500	#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = `8`))]
14501	#[rustc_legacy_const_generics(`2`)]
14502	pub unsafe fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(
14503	k: __mmask8,
14504	a: __m512d,
14505	) -> __m256i {
14506	static_assert_rounding!(ROUNDING);
14507	let a: f64x8 = a.as_f64x8();
14508	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
14509	let r: i32x8 = vcvtpd2dq(a, src:zero, mask:k, ROUNDING);
14510	transmute(src:r)
14511	}
14512
14513	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14514	///
14515	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14516	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14517	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14518	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14519	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14520	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14521	///
14522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
14523	#[inline]
14524	#[target_feature(enable = "avx512f")]
14525	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14526	#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = `8`))]
14527	#[rustc_legacy_const_generics(`1`)]
14528	pub unsafe fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14529	static_assert_rounding!(ROUNDING);
14530	let a: f64x8 = a.as_f64x8();
14531	let zero: u32x8 = _mm256_setzero_si256().as_u32x8();
14532	let r: u32x8 = vcvtpd2udq(a, src:zero, mask:`0b11111111`, ROUNDING);
14533	transmute(src:r)
14534	}
14535
14536	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14537	///
14538	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14539	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14540	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14541	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14542	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14543	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14544	///
14545	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
14546	#[inline]
14547	#[target_feature(enable = "avx512f")]
14548	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14549	#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = `8`))]
14550	#[rustc_legacy_const_generics(`3`)]
14551	pub unsafe fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
14552	src: __m256i,
14553	k: __mmask8,
14554	a: __m512d,
14555	) -> __m256i {
14556	static_assert_rounding!(ROUNDING);
14557	let a: f64x8 = a.as_f64x8();
14558	let src: u32x8 = src.as_u32x8();
14559	let r: u32x8 = vcvtpd2udq(a, src, mask:k, ROUNDING);
14560	transmute(src:r)
14561	}
14562
14563	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14564	///
14565	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14566	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14567	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14568	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14569	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14570	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14571	///
14572	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
14573	#[inline]
14574	#[target_feature(enable = "avx512f")]
14575	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14576	#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = `8`))]
14577	#[rustc_legacy_const_generics(`2`)]
14578	pub unsafe fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(
14579	k: __mmask8,
14580	a: __m512d,
14581	) -> __m256i {
14582	static_assert_rounding!(ROUNDING);
14583	let a: f64x8 = a.as_f64x8();
14584	let zero: u32x8 = _mm256_setzero_si256().as_u32x8();
14585	let r: u32x8 = vcvtpd2udq(a, src:zero, mask:k, ROUNDING);
14586	transmute(src:r)
14587	}
14588
14589	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14590	///
14591	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14592	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14593	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14594	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14595	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14596	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14597	///
14598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
14599	#[inline]
14600	#[target_feature(enable = "avx512f")]
14601	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14602	#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = `8`))]
14603	#[rustc_legacy_const_generics(`1`)]
14604	pub unsafe fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
14605	static_assert_rounding!(ROUNDING);
14606	let a: f64x8 = a.as_f64x8();
14607	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
14608	let r: f32x8 = vcvtpd2ps(a, src:zero, mask:`0b11111111`, ROUNDING);
14609	transmute(src:r)
14610	}
14611
14612	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14613	///
14614	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14615	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14616	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14617	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14618	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14619	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14620	///
14621	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
14622	#[inline]
14623	#[target_feature(enable = "avx512f")]
14624	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14625	#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = `8`))]
14626	#[rustc_legacy_const_generics(`3`)]
14627	pub unsafe fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
14628	src: __m256,
14629	k: __mmask8,
14630	a: __m512d,
14631	) -> __m256 {
14632	static_assert_rounding!(ROUNDING);
14633	let a: f64x8 = a.as_f64x8();
14634	let src: f32x8 = src.as_f32x8();
14635	let r: f32x8 = vcvtpd2ps(a, src, mask:k, ROUNDING);
14636	transmute(src:r)
14637	}
14638
14639	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14640	///
14641	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14642	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14643	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14644	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14645	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14646	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14647	///
14648	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
14649	#[inline]
14650	#[target_feature(enable = "avx512f")]
14651	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14652	#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = `8`))]
14653	#[rustc_legacy_const_generics(`2`)]
14654	pub unsafe fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
14655	static_assert_rounding!(ROUNDING);
14656	let a: f64x8 = a.as_f64x8();
14657	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
14658	let r: f32x8 = vcvtpd2ps(a, src:zero, mask:k, ROUNDING);
14659	transmute(src:r)
14660	}
14661
14662	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14663	///
14664	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14665	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14666	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14667	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14668	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14669	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14670	///
14671	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
14672	#[inline]
14673	#[target_feature(enable = "avx512f")]
14674	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14675	#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = `8`))]
14676	#[rustc_legacy_const_generics(`1`)]
14677	pub unsafe fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14678	static_assert_rounding!(ROUNDING);
14679	let a: i32x16 = a.as_i32x16();
14680	let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14681	transmute(src:r)
14682	}
14683
14684	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14685	///
14686	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14687	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14688	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14689	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14690	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14691	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14692	///
14693	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
14694	#[inline]
14695	#[target_feature(enable = "avx512f")]
14696	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14697	#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = `8`))]
14698	#[rustc_legacy_const_generics(`3`)]
14699	pub unsafe fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
14700	src: __m512,
14701	k: __mmask16,
14702	a: __m512i,
14703	) -> __m512 {
14704	static_assert_rounding!(ROUNDING);
14705	let a: i32x16 = a.as_i32x16();
14706	let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14707	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
14708	}
14709
14710	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14711	///
14712	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14713	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14714	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14715	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14716	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14717	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14718	///
14719	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
14720	#[inline]
14721	#[target_feature(enable = "avx512f")]
14722	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14723	#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = `8`))]
14724	#[rustc_legacy_const_generics(`2`)]
14725	pub unsafe fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(
14726	k: __mmask16,
14727	a: __m512i,
14728	) -> __m512 {
14729	static_assert_rounding!(ROUNDING);
14730	let a: i32x16 = a.as_i32x16();
14731	let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14732	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
14733	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
14734	}
14735
14736	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14737	///
14738	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14739	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14740	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14741	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14742	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14743	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14744	///
14745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
14746	#[inline]
14747	#[target_feature(enable = "avx512f")]
14748	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14749	#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = `8`))]
14750	#[rustc_legacy_const_generics(`1`)]
14751	pub unsafe fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14752	static_assert_rounding!(ROUNDING);
14753	let a: u32x16 = a.as_u32x16();
14754	let r: f32x16 = vcvtudq2ps(a, ROUNDING);
14755	transmute(src:r)
14756	}
14757
14758	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14759	///
14760	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14761	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14762	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14763	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14764	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14765	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14766	///
14767	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
14768	#[inline]
14769	#[target_feature(enable = "avx512f")]
14770	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14771	#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = `8`))]
14772	#[rustc_legacy_const_generics(`3`)]
14773	pub unsafe fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
14774	src: __m512,
14775	k: __mmask16,
14776	a: __m512i,
14777	) -> __m512 {
14778	static_assert_rounding!(ROUNDING);
14779	let a: u32x16 = a.as_u32x16();
14780	let r: f32x16 = vcvtudq2ps(a, ROUNDING);
14781	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
14782	}
14783
14784	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14785	///
14786	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14787	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14788	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14789	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14790	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14791	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14792	///
14793	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
14794	#[inline]
14795	#[target_feature(enable = "avx512f")]
14796	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14797	#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = `8`))]
14798	#[rustc_legacy_const_generics(`2`)]
14799	pub unsafe fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(
14800	k: __mmask16,
14801	a: __m512i,
14802	) -> __m512 {
14803	static_assert_rounding!(ROUNDING);
14804	let a: u32x16 = a.as_u32x16();
14805	let r: f32x16 = vcvtudq2ps(a, ROUNDING);
14806	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
14807	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
14808	}
14809
14810	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
14811	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14812	///
14813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
14814	#[inline]
14815	#[target_feature(enable = "avx512f")]
14816	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14817	#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = `8`))]
14818	#[rustc_legacy_const_generics(`1`)]
14819	pub unsafe fn _mm512_cvt_roundps_ph<const SAE: i32>(a: __m512) -> __m256i {
14820	static_assert_sae!(SAE);
14821	let a: f32x16 = a.as_f32x16();
14822	let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
14823	let r: i16x16 = vcvtps2ph(a, SAE, src:zero, mask:`0b11111111_11111111`);
14824	transmute(src:r)
14825	}
14826
14827	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14828	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14829	///
14830	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
14831	#[inline]
14832	#[target_feature(enable = "avx512f")]
14833	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14834	#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = `8`))]
14835	#[rustc_legacy_const_generics(`3`)]
14836	pub unsafe fn _mm512_mask_cvt_roundps_ph<const SAE: i32>(
14837	src: __m256i,
14838	k: __mmask16,
14839	a: __m512,
14840	) -> __m256i {
14841	static_assert_sae!(SAE);
14842	let a: f32x16 = a.as_f32x16();
14843	let src: i16x16 = src.as_i16x16();
14844	let r: i16x16 = vcvtps2ph(a, SAE, src, mask:k);
14845	transmute(src:r)
14846	}
14847
14848	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14849	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14850	///
14851	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
14852	#[inline]
14853	#[target_feature(enable = "avx512f")]
14854	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14855	#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = `8`))]
14856	#[rustc_legacy_const_generics(`2`)]
14857	pub unsafe fn _mm512_maskz_cvt_roundps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
14858	static_assert_sae!(SAE);
14859	let a: f32x16 = a.as_f32x16();
14860	let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
14861	let r: i16x16 = vcvtps2ph(a, SAE, src:zero, mask:k);
14862	transmute(src:r)
14863	}
14864
14865	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14866	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
14867	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14868	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14869	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14870	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14871	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14872	///
14873	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
14874	#[inline]
14875	#[target_feature(enable = "avx512f,avx512vl")]
14876	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14877	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
14878	#[rustc_legacy_const_generics(`3`)]
14879	pub unsafe fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
14880	src: __m128i,
14881	k: __mmask8,
14882	a: __m256,
14883	) -> __m128i {
14884	static_assert_uimm_bits!(IMM8, `8`);
14885	let a: f32x8 = a.as_f32x8();
14886	let src: i16x8 = src.as_i16x8();
14887	let r: i16x8 = vcvtps2ph256(a, IMM8, src, mask:k);
14888	transmute(src:r)
14889	}
14890
14891	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14892	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14893	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14894	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14895	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14896	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14897	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14898	///
14899	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
14900	#[inline]
14901	#[target_feature(enable = "avx512f,avx512vl")]
14902	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14903	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
14904	#[rustc_legacy_const_generics(`2`)]
14905	pub unsafe fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
14906	static_assert_uimm_bits!(IMM8, `8`);
14907	let a: f32x8 = a.as_f32x8();
14908	let zero: i16x8 = _mm_setzero_si128().as_i16x8();
14909	let r: i16x8 = vcvtps2ph256(a, IMM8, src:zero, mask:k);
14910	transmute(src:r)
14911	}
14912
14913	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14914	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14915	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14916	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14917	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14918	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14919	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14920	///
14921	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
14922	#[inline]
14923	#[target_feature(enable = "avx512f,avx512vl")]
14924	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14925	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
14926	#[rustc_legacy_const_generics(`3`)]
14927	pub unsafe fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(
14928	src: __m128i,
14929	k: __mmask8,
14930	a: __m128,
14931	) -> __m128i {
14932	static_assert_uimm_bits!(IMM8, `8`);
14933	let a: f32x4 = a.as_f32x4();
14934	let src: i16x8 = src.as_i16x8();
14935	let r: i16x8 = vcvtps2ph128(a, IMM8, src, mask:k);
14936	transmute(src:r)
14937	}
14938
14939	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14940	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14941	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14942	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14943	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14944	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14945	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14946	///
14947	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
14948	#[inline]
14949	#[target_feature(enable = "avx512f,avx512vl")]
14950	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14951	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
14952	#[rustc_legacy_const_generics(`2`)]
14953	pub unsafe fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
14954	static_assert_uimm_bits!(IMM8, `8`);
14955	let a: f32x4 = a.as_f32x4();
14956	let zero: i16x8 = _mm_setzero_si128().as_i16x8();
14957	let r: i16x8 = vcvtps2ph128(a, IMM8, src:zero, mask:k);
14958	transmute(src:r)
14959	}
14960
14961	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
14962	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14963	///
14964	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
14965	#[inline]
14966	#[target_feature(enable = "avx512f")]
14967	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14968	#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = `8`))]
14969	#[rustc_legacy_const_generics(`1`)]
14970	pub unsafe fn _mm512_cvtps_ph<const SAE: i32>(a: __m512) -> __m256i {
14971	static_assert_sae!(SAE);
14972	let a: f32x16 = a.as_f32x16();
14973	let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
14974	let r: i16x16 = vcvtps2ph(a, SAE, src:zero, mask:`0b11111111_11111111`);
14975	transmute(src:r)
14976	}
14977
14978	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14979	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14980	///
14981	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
14982	#[inline]
14983	#[target_feature(enable = "avx512f")]
14984	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14985	#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = `8`))]
14986	#[rustc_legacy_const_generics(`3`)]
14987	pub unsafe fn _mm512_mask_cvtps_ph<const SAE: i32>(
14988	src: __m256i,
14989	k: __mmask16,
14990	a: __m512,
14991	) -> __m256i {
14992	static_assert_sae!(SAE);
14993	let a: f32x16 = a.as_f32x16();
14994	let src: i16x16 = src.as_i16x16();
14995	let r: i16x16 = vcvtps2ph(a, SAE, src, mask:k);
14996	transmute(src:r)
14997	}
14998
14999	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15000	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15001	///
15002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15003	#[inline]
15004	#[target_feature(enable = "avx512f")]
15005	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15006	#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = `8`))]
15007	#[rustc_legacy_const_generics(`2`)]
15008	pub unsafe fn _mm512_maskz_cvtps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15009	static_assert_sae!(SAE);
15010	let a: f32x16 = a.as_f32x16();
15011	let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
15012	let r: i16x16 = vcvtps2ph(a, SAE, src:zero, mask:k);
15013	transmute(src:r)
15014	}
15015
15016	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15017	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15018	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
15019	/// _MM_FROUND_TO_NEG_INF // round down\
15020	/// _MM_FROUND_TO_POS_INF // round up\
15021	/// _MM_FROUND_TO_ZERO // truncate\
15022	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
15023	///
15024	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15025	#[inline]
15026	#[target_feature(enable = "avx512f,avx512vl")]
15027	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15028	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15029	#[rustc_legacy_const_generics(`3`)]
15030	pub unsafe fn _mm256_mask_cvtps_ph<const IMM8: i32>(
15031	src: __m128i,
15032	k: __mmask8,
15033	a: __m256,
15034	) -> __m128i {
15035	static_assert_uimm_bits!(IMM8, `8`);
15036	let a: f32x8 = a.as_f32x8();
15037	let src: i16x8 = src.as_i16x8();
15038	let r: i16x8 = vcvtps2ph256(a, IMM8, src, mask:k);
15039	transmute(src:r)
15040	}
15041
15042	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15043	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15044	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
15045	/// _MM_FROUND_TO_NEG_INF // round down\
15046	/// _MM_FROUND_TO_POS_INF // round up\
15047	/// _MM_FROUND_TO_ZERO // truncate\
15048	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
15049	///
15050	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15051	#[inline]
15052	#[target_feature(enable = "avx512f,avx512vl")]
15053	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15054	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15055	#[rustc_legacy_const_generics(`2`)]
15056	pub unsafe fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15057	static_assert_uimm_bits!(IMM8, `8`);
15058	let a: f32x8 = a.as_f32x8();
15059	let zero: i16x8 = _mm_setzero_si128().as_i16x8();
15060	let r: i16x8 = vcvtps2ph256(a, IMM8, src:zero, mask:k);
15061	transmute(src:r)
15062	}
15063
15064	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15065	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15066	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
15067	/// _MM_FROUND_TO_NEG_INF // round down\
15068	/// _MM_FROUND_TO_POS_INF // round up\
15069	/// _MM_FROUND_TO_ZERO // truncate\
15070	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
15071	///
15072	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15073	#[inline]
15074	#[target_feature(enable = "avx512f,avx512vl")]
15075	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15076	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15077	#[rustc_legacy_const_generics(`3`)]
15078	pub unsafe fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15079	static_assert_uimm_bits!(IMM8, `8`);
15080	let a: f32x4 = a.as_f32x4();
15081	let src: i16x8 = src.as_i16x8();
15082	let r: i16x8 = vcvtps2ph128(a, IMM8, src, mask:k);
15083	transmute(src:r)
15084	}
15085
15086	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15087	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15088	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
15089	/// _MM_FROUND_TO_NEG_INF // round down\
15090	/// _MM_FROUND_TO_POS_INF // round up\
15091	/// _MM_FROUND_TO_ZERO // truncate\
15092	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
15093	///
15094	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15095	#[inline]
15096	#[target_feature(enable = "avx512f,avx512vl")]
15097	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15098	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15099	#[rustc_legacy_const_generics(`2`)]
15100	pub unsafe fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15101	static_assert_uimm_bits!(IMM8, `8`);
15102	let a: f32x4 = a.as_f32x4();
15103	let zero: i16x8 = _mm_setzero_si128().as_i16x8();
15104	let r: i16x8 = vcvtps2ph128(a, IMM8, src:zero, mask:k);
15105	transmute(src:r)
15106	}
15107
15108	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15109	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15110	///
15111	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15112	#[inline]
15113	#[target_feature(enable = "avx512f")]
15114	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15115	#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = `8`))]
15116	#[rustc_legacy_const_generics(`1`)]
15117	pub unsafe fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15118	static_assert_sae!(SAE);
15119	let a: i16x16 = a.as_i16x16();
15120	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
15121	let r: f32x16 = vcvtph2ps(a, src:zero, mask:`0b11111111_11111111`, SAE);
15122	transmute(src:r)
15123	}
15124
15125	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15126	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15127	///
15128	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15129	#[inline]
15130	#[target_feature(enable = "avx512f")]
15131	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15132	#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = `8`))]
15133	#[rustc_legacy_const_generics(`3`)]
15134	pub unsafe fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(
15135	src: __m512,
15136	k: __mmask16,
15137	a: __m256i,
15138	) -> __m512 {
15139	static_assert_sae!(SAE);
15140	let a: i16x16 = a.as_i16x16();
15141	let src: f32x16 = src.as_f32x16();
15142	let r: f32x16 = vcvtph2ps(a, src, mask:k, SAE);
15143	transmute(src:r)
15144	}
15145
15146	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15147	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15148	///
15149	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15150	#[inline]
15151	#[target_feature(enable = "avx512f")]
15152	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15153	#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = `8`))]
15154	#[rustc_legacy_const_generics(`2`)]
15155	pub unsafe fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15156	static_assert_sae!(SAE);
15157	let a: i16x16 = a.as_i16x16();
15158	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
15159	let r: f32x16 = vcvtph2ps(a, src:zero, mask:k, SAE);
15160	transmute(src:r)
15161	}
15162
15163	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15164	///
15165	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15166	#[inline]
15167	#[target_feature(enable = "avx512f")]
15168	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15169	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15170	pub unsafe fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15171	transmute(src:vcvtph2ps(
15172	a:a.as_i16x16(),
15173	src:_mm512_setzero_ps().as_f32x16(),
15174	mask:`0b11111111_11111111`,
15175	_MM_FROUND_NO_EXC,
15176	))
15177	}
15178
15179	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15180	///
15181	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15182	#[inline]
15183	#[target_feature(enable = "avx512f")]
15184	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15185	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15186	pub unsafe fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15187	transmute(src:vcvtph2ps(
15188	a:a.as_i16x16(),
15189	src:src.as_f32x16(),
15190	mask:k,
15191	_MM_FROUND_NO_EXC,
15192	))
15193	}
15194
15195	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15196	///
15197	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15198	#[inline]
15199	#[target_feature(enable = "avx512f")]
15200	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15201	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15202	pub unsafe fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15203	transmute(src:vcvtph2ps(
15204	a:a.as_i16x16(),
15205	src:_mm512_setzero_ps().as_f32x16(),
15206	mask:k,
15207	_MM_FROUND_NO_EXC,
15208	))
15209	}
15210
15211	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15212	///
15213	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15214	#[inline]
15215	#[target_feature(enable = "avx512f,avx512vl")]
15216	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15217	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15218	pub unsafe fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15219	let convert: __m256 = _mm256_cvtph_ps(a);
15220	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:src.as_f32x8()))
15221	}
15222
15223	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15224	///
15225	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15226	#[inline]
15227	#[target_feature(enable = "avx512f,avx512vl")]
15228	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15229	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15230	pub unsafe fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15231	let convert: __m256 = _mm256_cvtph_ps(a);
15232	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
15233	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:zero))
15234	}
15235
15236	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15237	///
15238	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15239	#[inline]
15240	#[target_feature(enable = "avx512f,avx512vl")]
15241	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15242	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15243	pub unsafe fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15244	let convert: __m128 = _mm_cvtph_ps(a);
15245	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
15246	}
15247
15248	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15249	///
15250	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15251	#[inline]
15252	#[target_feature(enable = "avx512f,avx512vl")]
15253	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15254	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15255	pub unsafe fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15256	let convert: __m128 = _mm_cvtph_ps(a);
15257	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
15258	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:zero))
15259	}
15260
15261	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15262	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15263	///
15264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15265	#[inline]
15266	#[target_feature(enable = "avx512f")]
15267	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15268	#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = `8`))]
15269	#[rustc_legacy_const_generics(`1`)]
15270	pub unsafe fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15271	static_assert_sae!(SAE);
15272	let a: f32x16 = a.as_f32x16();
15273	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
15274	let r: i32x16 = vcvttps2dq(a, src:zero, mask:`0b11111111_11111111`, SAE);
15275	transmute(src:r)
15276	}
15277
15278	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15279	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15280	///
15281	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15282	#[inline]
15283	#[target_feature(enable = "avx512f")]
15284	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15285	#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = `8`))]
15286	#[rustc_legacy_const_generics(`3`)]
15287	pub unsafe fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15288	src: __m512i,
15289	k: __mmask16,
15290	a: __m512,
15291	) -> __m512i {
15292	static_assert_sae!(SAE);
15293	let a: f32x16 = a.as_f32x16();
15294	let src: i32x16 = src.as_i32x16();
15295	let r: i32x16 = vcvttps2dq(a, src, mask:k, SAE);
15296	transmute(src:r)
15297	}
15298
15299	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15300	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15301	///
15302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15303	#[inline]
15304	#[target_feature(enable = "avx512f")]
15305	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15306	#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = `8`))]
15307	#[rustc_legacy_const_generics(`2`)]
15308	pub unsafe fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15309	static_assert_sae!(SAE);
15310	let a: f32x16 = a.as_f32x16();
15311	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
15312	let r: i32x16 = vcvttps2dq(a, src:zero, mask:k, SAE);
15313	transmute(src:r)
15314	}
15315
15316	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15317	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15318	///
15319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
15320	#[inline]
15321	#[target_feature(enable = "avx512f")]
15322	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15323	#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = `8`))]
15324	#[rustc_legacy_const_generics(`1`)]
15325	pub unsafe fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
15326	static_assert_sae!(SAE);
15327	let a: f32x16 = a.as_f32x16();
15328	let zero: u32x16 = _mm512_setzero_si512().as_u32x16();
15329	let r: u32x16 = vcvttps2udq(a, src:zero, mask:`0b11111111_11111111`, SAE);
15330	transmute(src:r)
15331	}
15332
15333	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15334	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15335	///
15336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
15337	#[inline]
15338	#[target_feature(enable = "avx512f")]
15339	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15340	#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = `8`))]
15341	#[rustc_legacy_const_generics(`3`)]
15342	pub unsafe fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
15343	src: __m512i,
15344	k: __mmask16,
15345	a: __m512,
15346	) -> __m512i {
15347	static_assert_sae!(SAE);
15348	let a: f32x16 = a.as_f32x16();
15349	let src: u32x16 = src.as_u32x16();
15350	let r: u32x16 = vcvttps2udq(a, src, mask:k, SAE);
15351	transmute(src:r)
15352	}
15353
15354	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15355	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15356	///
15357	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
15358	#[inline]
15359	#[target_feature(enable = "avx512f")]
15360	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15361	#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = `8`))]
15362	#[rustc_legacy_const_generics(`2`)]
15363	pub unsafe fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15364	static_assert_sae!(SAE);
15365	let a: f32x16 = a.as_f32x16();
15366	let zero: u32x16 = _mm512_setzero_si512().as_u32x16();
15367	let r: u32x16 = vcvttps2udq(a, src:zero, mask:k, SAE);
15368	transmute(src:r)
15369	}
15370
15371	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15372	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15373	///
15374	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
15375	#[inline]
15376	#[target_feature(enable = "avx512f")]
15377	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15378	#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = `8`))]
15379	#[rustc_legacy_const_generics(`1`)]
15380	pub unsafe fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
15381	static_assert_sae!(SAE);
15382	let a: f64x8 = a.as_f64x8();
15383	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
15384	let r: i32x8 = vcvttpd2dq(a, src:zero, mask:`0b11111111`, SAE);
15385	transmute(src:r)
15386	}
15387
15388	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15389	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15390	///
15391	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
15392	#[inline]
15393	#[target_feature(enable = "avx512f")]
15394	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15395	#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = `8`))]
15396	#[rustc_legacy_const_generics(`3`)]
15397	pub unsafe fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
15398	src: __m256i,
15399	k: __mmask8,
15400	a: __m512d,
15401	) -> __m256i {
15402	static_assert_sae!(SAE);
15403	let a: f64x8 = a.as_f64x8();
15404	let src: i32x8 = src.as_i32x8();
15405	let r: i32x8 = vcvttpd2dq(a, src, mask:k, SAE);
15406	transmute(src:r)
15407	}
15408
15409	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15410	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15411	///
15412	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15413	#[inline]
15414	#[target_feature(enable = "avx512f")]
15415	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15416	#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = `8`))]
15417	#[rustc_legacy_const_generics(`2`)]
15418	pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15419	static_assert_sae!(SAE);
15420	let a: f64x8 = a.as_f64x8();
15421	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
15422	let r: i32x8 = vcvttpd2dq(a, src:zero, mask:k, SAE);
15423	transmute(src:r)
15424	}
15425
15426	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15427	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15428	///
15429	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
15430	#[inline]
15431	#[target_feature(enable = "avx512f")]
15432	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15433	#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = `8`))]
15434	#[rustc_legacy_const_generics(`1`)]
15435	pub unsafe fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
15436	static_assert_sae!(SAE);
15437	let a: f64x8 = a.as_f64x8();
15438	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
15439	let r: u32x8 = vcvttpd2udq(a, src:zero, mask:`0b11111111`, SAE);
15440	transmute(src:r)
15441	}
15442
15443	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15444	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15445	///
15446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
15447	#[inline]
15448	#[target_feature(enable = "avx512f")]
15449	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15450	#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = `8`))]
15451	#[rustc_legacy_const_generics(`3`)]
15452	pub unsafe fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
15453	src: __m256i,
15454	k: __mmask8,
15455	a: __m512d,
15456	) -> __m256i {
15457	static_assert_sae!(SAE);
15458	let a: f64x8 = a.as_f64x8();
15459	let src: i32x8 = src.as_i32x8();
15460	let r: u32x8 = vcvttpd2udq(a, src, mask:k, SAE);
15461	transmute(src:r)
15462	}
15463
15464	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15465	///
15466	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
15467	#[inline]
15468	#[target_feature(enable = "avx512f")]
15469	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15470	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15471	pub unsafe fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
15472	transmute(src:vcvttps2dq(
15473	a:a.as_f32x16(),
15474	src:_mm512_setzero_si512().as_i32x16(),
15475	mask:`0b11111111_11111111`,
15476	_MM_FROUND_CUR_DIRECTION,
15477	))
15478	}
15479
15480	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15481	///
15482	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
15483	#[inline]
15484	#[target_feature(enable = "avx512f")]
15485	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15486	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15487	pub unsafe fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15488	transmute(src:vcvttps2dq(
15489	a:a.as_f32x16(),
15490	src:src.as_i32x16(),
15491	mask:k,
15492	_MM_FROUND_CUR_DIRECTION,
15493	))
15494	}
15495
15496	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15497	///
15498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
15499	#[inline]
15500	#[target_feature(enable = "avx512f")]
15501	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15502	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15503	pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
15504	transmute(src:vcvttps2dq(
15505	a:a.as_f32x16(),
15506	src:_mm512_setzero_si512().as_i32x16(),
15507	mask:k,
15508	_MM_FROUND_CUR_DIRECTION,
15509	))
15510	}
15511
15512	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15513	///
15514	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
15515	#[inline]
15516	#[target_feature(enable = "avx512f,avx512vl")]
15517	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15518	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15519	pub unsafe fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15520	transmute(src:vcvttps2dq256(a:a.as_f32x8(), src:src.as_i32x8(), mask:k))
15521	}
15522
15523	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15524	///
15525	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
15526	#[inline]
15527	#[target_feature(enable = "avx512f,avx512vl")]
15528	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15529	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15530	pub unsafe fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
15531	transmute(src:vcvttps2dq256(
15532	a:a.as_f32x8(),
15533	src:_mm256_setzero_si256().as_i32x8(),
15534	mask:k,
15535	))
15536	}
15537
15538	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15539	///
15540	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
15541	#[inline]
15542	#[target_feature(enable = "avx512f,avx512vl")]
15543	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15544	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15545	pub unsafe fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15546	transmute(src:vcvttps2dq128(a:a.as_f32x4(), src:src.as_i32x4(), mask:k))
15547	}
15548
15549	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15550	///
15551	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
15552	#[inline]
15553	#[target_feature(enable = "avx512f,avx512vl")]
15554	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15555	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15556	pub unsafe fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
15557	transmute(src:vcvttps2dq128(
15558	a:a.as_f32x4(),
15559	src:_mm_setzero_si128().as_i32x4(),
15560	mask:k,
15561	))
15562	}
15563
15564	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15565	///
15566	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
15567	#[inline]
15568	#[target_feature(enable = "avx512f")]
15569	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15570	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15571	pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
15572	transmute(src:vcvttps2udq(
15573	a:a.as_f32x16(),
15574	src:_mm512_setzero_si512().as_u32x16(),
15575	mask:`0b11111111_11111111`,
15576	_MM_FROUND_CUR_DIRECTION,
15577	))
15578	}
15579
15580	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15581	///
15582	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
15583	#[inline]
15584	#[target_feature(enable = "avx512f")]
15585	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15586	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15587	pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15588	transmute(src:vcvttps2udq(
15589	a:a.as_f32x16(),
15590	src:src.as_u32x16(),
15591	mask:k,
15592	_MM_FROUND_CUR_DIRECTION,
15593	))
15594	}
15595
15596	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15597	///
15598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
15599	#[inline]
15600	#[target_feature(enable = "avx512f")]
15601	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15602	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15603	pub unsafe fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
15604	transmute(src:vcvttps2udq(
15605	a:a.as_f32x16(),
15606	src:_mm512_setzero_si512().as_u32x16(),
15607	mask:k,
15608	_MM_FROUND_CUR_DIRECTION,
15609	))
15610	}
15611
15612	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15613	///
15614	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
15615	#[inline]
15616	#[target_feature(enable = "avx512f,avx512vl")]
15617	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15618	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15619	pub unsafe fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
15620	transmute(src:vcvttps2udq256(
15621	a:a.as_f32x8(),
15622	src:_mm256_setzero_si256().as_u32x8(),
15623	mask:`0b11111111`,
15624	))
15625	}
15626
15627	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15628	///
15629	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
15630	#[inline]
15631	#[target_feature(enable = "avx512f,avx512vl")]
15632	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15633	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15634	pub unsafe fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15635	transmute(src:vcvttps2udq256(a:a.as_f32x8(), src:src.as_u32x8(), mask:k))
15636	}
15637
15638	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15639	///
15640	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
15641	#[inline]
15642	#[target_feature(enable = "avx512f,avx512vl")]
15643	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15644	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15645	pub unsafe fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
15646	transmute(src:vcvttps2udq256(
15647	a:a.as_f32x8(),
15648	src:_mm256_setzero_si256().as_u32x8(),
15649	mask:k,
15650	))
15651	}
15652
15653	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15654	///
15655	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
15656	#[inline]
15657	#[target_feature(enable = "avx512f,avx512vl")]
15658	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15659	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15660	pub unsafe fn _mm_cvttps_epu32(a: __m128) -> __m128i {
15661	transmute(src:vcvttps2udq128(
15662	a:a.as_f32x4(),
15663	src:_mm_setzero_si128().as_u32x4(),
15664	mask:`0b11111111`,
15665	))
15666	}
15667
15668	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15669	///
15670	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
15671	#[inline]
15672	#[target_feature(enable = "avx512f,avx512vl")]
15673	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15674	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15675	pub unsafe fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15676	transmute(src:vcvttps2udq128(a:a.as_f32x4(), src:src.as_u32x4(), mask:k))
15677	}
15678
15679	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15680	///
15681	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
15682	#[inline]
15683	#[target_feature(enable = "avx512f,avx512vl")]
15684	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15685	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15686	pub unsafe fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
15687	transmute(src:vcvttps2udq128(
15688	a:a.as_f32x4(),
15689	src:_mm_setzero_si128().as_u32x4(),
15690	mask:k,
15691	))
15692	}
15693
15694	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15695	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15696	///
15697	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
15698	#[inline]
15699	#[target_feature(enable = "avx512f")]
15700	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15701	#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = `8`))]
15702	#[rustc_legacy_const_generics(`2`)]
15703	pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15704	static_assert_sae!(SAE);
15705	let a: f64x8 = a.as_f64x8();
15706	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
15707	let r: u32x8 = vcvttpd2udq(a, src:zero, mask:k, SAE);
15708	transmute(src:r)
15709	}
15710
15711	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15712	///
15713	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
15714	#[inline]
15715	#[target_feature(enable = "avx512f")]
15716	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15717	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15718	pub unsafe fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
15719	transmute(src:vcvttpd2dq(
15720	a:a.as_f64x8(),
15721	src:_mm256_setzero_si256().as_i32x8(),
15722	mask:`0b11111111`,
15723	_MM_FROUND_CUR_DIRECTION,
15724	))
15725	}
15726
15727	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15728	///
15729	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
15730	#[inline]
15731	#[target_feature(enable = "avx512f")]
15732	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15733	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15734	pub unsafe fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
15735	transmute(src:vcvttpd2dq(
15736	a:a.as_f64x8(),
15737	src:src.as_i32x8(),
15738	mask:k,
15739	_MM_FROUND_CUR_DIRECTION,
15740	))
15741	}
15742
15743	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15744	///
15745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
15746	#[inline]
15747	#[target_feature(enable = "avx512f")]
15748	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15749	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15750	pub unsafe fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
15751	transmute(src:vcvttpd2dq(
15752	a:a.as_f64x8(),
15753	src:_mm256_setzero_si256().as_i32x8(),
15754	mask:k,
15755	_MM_FROUND_CUR_DIRECTION,
15756	))
15757	}
15758
15759	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15760	///
15761	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
15762	#[inline]
15763	#[target_feature(enable = "avx512f,avx512vl")]
15764	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15765	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15766	pub unsafe fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
15767	transmute(src:vcvttpd2dq256(a:a.as_f64x4(), src:src.as_i32x4(), mask:k))
15768	}
15769
15770	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15771	///
15772	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
15773	#[inline]
15774	#[target_feature(enable = "avx512f,avx512vl")]
15775	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15776	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15777	pub unsafe fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
15778	transmute(src:vcvttpd2dq256(
15779	a:a.as_f64x4(),
15780	src:_mm_setzero_si128().as_i32x4(),
15781	mask:k,
15782	))
15783	}
15784
15785	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15786	///
15787	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
15788	#[inline]
15789	#[target_feature(enable = "avx512f,avx512vl")]
15790	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15791	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15792	pub unsafe fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
15793	transmute(src:vcvttpd2dq128(a:a.as_f64x2(), src:src.as_i32x4(), mask:k))
15794	}
15795
15796	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15797	///
15798	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
15799	#[inline]
15800	#[target_feature(enable = "avx512f,avx512vl")]
15801	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15802	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15803	pub unsafe fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
15804	transmute(src:vcvttpd2dq128(
15805	a:a.as_f64x2(),
15806	src:_mm_setzero_si128().as_i32x4(),
15807	mask:k,
15808	))
15809	}
15810
15811	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15812	///
15813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
15814	#[inline]
15815	#[target_feature(enable = "avx512f")]
15816	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15817	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15818	pub unsafe fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
15819	transmute(src:vcvttpd2udq(
15820	a:a.as_f64x8(),
15821	src:_mm256_setzero_si256().as_i32x8(),
15822	mask:`0b11111111`,
15823	_MM_FROUND_CUR_DIRECTION,
15824	))
15825	}
15826
15827	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15828	///
15829	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
15830	#[inline]
15831	#[target_feature(enable = "avx512f")]
15832	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15833	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15834	pub unsafe fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
15835	transmute(src:vcvttpd2udq(
15836	a:a.as_f64x8(),
15837	src:src.as_i32x8(),
15838	mask:k,
15839	_MM_FROUND_CUR_DIRECTION,
15840	))
15841	}
15842
15843	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15844	///
15845	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
15846	#[inline]
15847	#[target_feature(enable = "avx512f")]
15848	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15849	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15850	pub unsafe fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
15851	transmute(src:vcvttpd2udq(
15852	a:a.as_f64x8(),
15853	src:_mm256_setzero_si256().as_i32x8(),
15854	mask:k,
15855	_MM_FROUND_CUR_DIRECTION,
15856	))
15857	}
15858
15859	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15860	///
15861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
15862	#[inline]
15863	#[target_feature(enable = "avx512f,avx512vl")]
15864	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15865	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15866	pub unsafe fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
15867	transmute(src:vcvttpd2udq256(
15868	a:a.as_f64x4(),
15869	src:_mm_setzero_si128().as_i32x4(),
15870	mask:`0b11111111`,
15871	))
15872	}
15873
15874	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15875	///
15876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
15877	#[inline]
15878	#[target_feature(enable = "avx512f,avx512vl")]
15879	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15880	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15881	pub unsafe fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
15882	transmute(src:vcvttpd2udq256(a:a.as_f64x4(), src:src.as_i32x4(), mask:k))
15883	}
15884
15885	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15886	///
15887	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
15888	#[inline]
15889	#[target_feature(enable = "avx512f,avx512vl")]
15890	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15891	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15892	pub unsafe fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
15893	transmute(src:vcvttpd2udq256(
15894	a:a.as_f64x4(),
15895	src:_mm_setzero_si128().as_i32x4(),
15896	mask:k,
15897	))
15898	}
15899
15900	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15901	///
15902	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
15903	#[inline]
15904	#[target_feature(enable = "avx512f,avx512vl")]
15905	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15906	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15907	pub unsafe fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
15908	transmute(src:vcvttpd2udq128(
15909	a:a.as_f64x2(),
15910	src:_mm_setzero_si128().as_i32x4(),
15911	mask:`0b11111111`,
15912	))
15913	}
15914
15915	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15916	///
15917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
15918	#[inline]
15919	#[target_feature(enable = "avx512f,avx512vl")]
15920	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15921	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15922	pub unsafe fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
15923	transmute(src:vcvttpd2udq128(a:a.as_f64x2(), src:src.as_i32x4(), mask:k))
15924	}
15925
15926	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15927	///
15928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
15929	#[inline]
15930	#[target_feature(enable = "avx512f,avx512vl")]
15931	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15932	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15933	pub unsafe fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
15934	transmute(src:vcvttpd2udq128(
15935	a:a.as_f64x2(),
15936	src:_mm_setzero_si128().as_i32x4(),
15937	mask:k,
15938	))
15939	}
15940
15941	/// Returns vector of type `__m512d` with all elements set to zero.
15942	///
15943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
15944	#[inline]
15945	#[target_feature(enable = "avx512f")]
15946	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15947	#[cfg_attr(test, assert_instr(vxorps))]
15948	pub unsafe fn _mm512_setzero_pd() -> __m512d {
15949	// All-0 is a properly initialized __m512d
15950	mem::zeroed()
15951	}
15952
15953	/// Returns vector of type `__m512` with all elements set to zero.
15954	///
15955	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
15956	#[inline]
15957	#[target_feature(enable = "avx512f")]
15958	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15959	#[cfg_attr(test, assert_instr(vxorps))]
15960	pub unsafe fn _mm512_setzero_ps() -> __m512 {
15961	// All-0 is a properly initialized __m512
15962	mem::zeroed()
15963	}
15964
15965	/// Return vector of type `__m512` with all elements set to zero.
15966	///
15967	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
15968	#[inline]
15969	#[target_feature(enable = "avx512f")]
15970	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15971	#[cfg_attr(test, assert_instr(vxorps))]
15972	pub unsafe fn _mm512_setzero() -> __m512 {
15973	// All-0 is a properly initialized __m512
15974	mem::zeroed()
15975	}
15976
15977	/// Returns vector of type `__m512i` with all elements set to zero.
15978	///
15979	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
15980	#[inline]
15981	#[target_feature(enable = "avx512f")]
15982	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15983	#[cfg_attr(test, assert_instr(vxorps))]
15984	pub unsafe fn _mm512_setzero_si512() -> __m512i {
15985	// All-0 is a properly initialized __m512i
15986	mem::zeroed()
15987	}
15988
15989	/// Return vector of type `__m512i` with all elements set to zero.
15990	///
15991	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
15992	#[inline]
15993	#[target_feature(enable = "avx512f")]
15994	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15995	#[cfg_attr(test, assert_instr(vxorps))]
15996	pub unsafe fn _mm512_setzero_epi32() -> __m512i {
15997	// All-0 is a properly initialized __m512i
15998	mem::zeroed()
15999	}
16000
16001	/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16002	/// order.
16003	///
16004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16005	#[inline]
16006	#[target_feature(enable = "avx512f")]
16007	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16008	pub unsafe fn _mm512_setr_epi32(
16009	e15: i32,
16010	e14: i32,
16011	e13: i32,
16012	e12: i32,
16013	e11: i32,
16014	e10: i32,
16015	e9: i32,
16016	e8: i32,
16017	e7: i32,
16018	e6: i32,
16019	e5: i32,
16020	e4: i32,
16021	e3: i32,
16022	e2: i32,
16023	e1: i32,
16024	e0: i32,
16025	) -> __m512i {
16026	let r: i32x16 = i32x16::new(
16027	x0:e15, x1:e14, x2:e13, x3:e12, x4:e11, x5:e10, x6:e9, x7:e8, x8:e7, x9:e6, x10:e5, x11:e4, x12:e3, x13:e2, x14:e1, x15:e0,
16028	);
16029	transmute(src:r)
16030	}
16031
16032	/// Set packed 8-bit integers in dst with the supplied values.
16033	///
16034	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16035	#[inline]
16036	#[target_feature(enable = "avx512f")]
16037	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16038	pub unsafe fn _mm512_set_epi8(
16039	e63: i8,
16040	e62: i8,
16041	e61: i8,
16042	e60: i8,
16043	e59: i8,
16044	e58: i8,
16045	e57: i8,
16046	e56: i8,
16047	e55: i8,
16048	e54: i8,
16049	e53: i8,
16050	e52: i8,
16051	e51: i8,
16052	e50: i8,
16053	e49: i8,
16054	e48: i8,
16055	e47: i8,
16056	e46: i8,
16057	e45: i8,
16058	e44: i8,
16059	e43: i8,
16060	e42: i8,
16061	e41: i8,
16062	e40: i8,
16063	e39: i8,
16064	e38: i8,
16065	e37: i8,
16066	e36: i8,
16067	e35: i8,
16068	e34: i8,
16069	e33: i8,
16070	e32: i8,
16071	e31: i8,
16072	e30: i8,
16073	e29: i8,
16074	e28: i8,
16075	e27: i8,
16076	e26: i8,
16077	e25: i8,
16078	e24: i8,
16079	e23: i8,
16080	e22: i8,
16081	e21: i8,
16082	e20: i8,
16083	e19: i8,
16084	e18: i8,
16085	e17: i8,
16086	e16: i8,
16087	e15: i8,
16088	e14: i8,
16089	e13: i8,
16090	e12: i8,
16091	e11: i8,
16092	e10: i8,
16093	e9: i8,
16094	e8: i8,
16095	e7: i8,
16096	e6: i8,
16097	e5: i8,
16098	e4: i8,
16099	e3: i8,
16100	e2: i8,
16101	e1: i8,
16102	e0: i8,
16103	) -> __m512i {
16104	let r: i8x64 = i8x64::new(
16105	x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18, x19:e19,
16106	x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31, x32:e32, x33:e33, x34:e34, x35:e35, x36:e36, x37:e37,
16107	x38:e38, x39:e39, x40:e40, x41:e41, x42:e42, x43:e43, x44:e44, x45:e45, x46:e46, x47:e47, x48:e48, x49:e49, x50:e50, x51:e51, x52:e52, x53:e53, x54:e54, x55:e55,
16108	x56:e56, x57:e57, x58:e58, x59:e59, x60:e60, x61:e61, x62:e62, x63:e63,
16109	);
16110	transmute(src:r)
16111	}
16112
16113	/// Set packed 16-bit integers in dst with the supplied values.
16114	///
16115	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16116	#[inline]
16117	#[target_feature(enable = "avx512f")]
16118	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16119	pub unsafe fn _mm512_set_epi16(
16120	e31: i16,
16121	e30: i16,
16122	e29: i16,
16123	e28: i16,
16124	e27: i16,
16125	e26: i16,
16126	e25: i16,
16127	e24: i16,
16128	e23: i16,
16129	e22: i16,
16130	e21: i16,
16131	e20: i16,
16132	e19: i16,
16133	e18: i16,
16134	e17: i16,
16135	e16: i16,
16136	e15: i16,
16137	e14: i16,
16138	e13: i16,
16139	e12: i16,
16140	e11: i16,
16141	e10: i16,
16142	e9: i16,
16143	e8: i16,
16144	e7: i16,
16145	e6: i16,
16146	e5: i16,
16147	e4: i16,
16148	e3: i16,
16149	e2: i16,
16150	e1: i16,
16151	e0: i16,
16152	) -> __m512i {
16153	let r: i16x32 = i16x32::new(
16154	x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18, x19:e19,
16155	x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31,
16156	);
16157	transmute(src:r)
16158	}
16159
16160	/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16161	///
16162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16163	#[inline]
16164	#[target_feature(enable = "avx512f")]
16165	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16166	pub unsafe fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16167	_mm512_set_epi32(e15:d, e14:c, e13:b, e12:a, e11:d, e10:c, e9:b, e8:a, e7:d, e6:c, e5:b, e4:a, e3:d, e2:c, e1:b, e0:a)
16168	}
16169
16170	/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16171	///
16172	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16173	#[inline]
16174	#[target_feature(enable = "avx512f")]
16175	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16176	pub unsafe fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16177	_mm512_set_ps(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a, e8:d, e9:c, e10:b, e11:a, e12:d, e13:c, e14:b, e15:a)
16178	}
16179
16180	/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16181	///
16182	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16183	#[inline]
16184	#[target_feature(enable = "avx512f")]
16185	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16186	pub unsafe fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16187	_mm512_set_pd(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
16188	}
16189
16190	/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16191	///
16192	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16193	#[inline]
16194	#[target_feature(enable = "avx512f")]
16195	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16196	pub unsafe fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16197	_mm512_set_epi32(e15:a, e14:b, e13:c, e12:d, e11:a, e10:b, e9:c, e8:d, e7:a, e6:b, e5:c, e4:d, e3:a, e2:b, e1:c, e0:d)
16198	}
16199
16200	/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16201	///
16202	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16203	#[inline]
16204	#[target_feature(enable = "avx512f")]
16205	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16206	pub unsafe fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16207	_mm512_set_ps(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d, e8:a, e9:b, e10:c, e11:d, e12:a, e13:b, e14:c, e15:d)
16208	}
16209
16210	/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16211	///
16212	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16213	#[inline]
16214	#[target_feature(enable = "avx512f")]
16215	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16216	pub unsafe fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16217	_mm512_set_pd(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
16218	}
16219
16220	/// Set packed 64-bit integers in dst with the supplied values.
16221	///
16222	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16223	#[inline]
16224	#[target_feature(enable = "avx512f")]
16225	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16226	pub unsafe fn _mm512_set_epi64(
16227	e0: i64,
16228	e1: i64,
16229	e2: i64,
16230	e3: i64,
16231	e4: i64,
16232	e5: i64,
16233	e6: i64,
16234	e7: i64,
16235	) -> __m512i {
16236	_mm512_setr_epi64(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
16237	}
16238
16239	/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16240	///
16241	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16242	#[inline]
16243	#[target_feature(enable = "avx512f")]
16244	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16245	pub unsafe fn _mm512_setr_epi64(
16246	e0: i64,
16247	e1: i64,
16248	e2: i64,
16249	e3: i64,
16250	e4: i64,
16251	e5: i64,
16252	e6: i64,
16253	e7: i64,
16254	) -> __m512i {
16255	let r: i64x8 = i64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
16256	transmute(src:r)
16257	}
16258
16259	/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16260	///
16261	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16262	#[inline]
16263	#[target_feature(enable = "avx512f")]
16264	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16265	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
16266	#[rustc_legacy_const_generics(`2`)]
16267	pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(offsets: __m256i, slice: *const u8) -> __m512d {
16268	static_assert_imm8_scale!(SCALE);
16269	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
16270	let neg_one: i8 = `-1`;
16271	let slice: const i8 = slice as const i8;
16272	let offsets: i32x8 = offsets.as_i32x8();
16273	let r: f64x8 = vgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE);
16274	transmute(src:r)
16275	}
16276
16277	/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16278	///
16279	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
16280	#[inline]
16281	#[target_feature(enable = "avx512f")]
16282	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16283	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
16284	#[rustc_legacy_const_generics(`4`)]
16285	pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
16286	src: __m512d,
16287	mask: __mmask8,
16288	offsets: __m256i,
16289	slice: *const u8,
16290	) -> __m512d {
16291	static_assert_imm8_scale!(SCALE);
16292	let src: f64x8 = src.as_f64x8();
16293	let slice: const i8 = slice as const i8;
16294	let offsets: i32x8 = offsets.as_i32x8();
16295	let r: f64x8 = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
16296	transmute(src:r)
16297	}
16298
16299	/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16300	///
16301	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
16302	#[inline]
16303	#[target_feature(enable = "avx512f")]
16304	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16305	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
16306	#[rustc_legacy_const_generics(`2`)]
16307	pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512d {
16308	static_assert_imm8_scale!(SCALE);
16309	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
16310	let neg_one: i8 = `-1`;
16311	let slice: const i8 = slice as const i8;
16312	let offsets: i64x8 = offsets.as_i64x8();
16313	let r: f64x8 = vgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE);
16314	transmute(src:r)
16315	}
16316
16317	/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16318	///
16319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
16320	#[inline]
16321	#[target_feature(enable = "avx512f")]
16322	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16323	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
16324	#[rustc_legacy_const_generics(`4`)]
16325	pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
16326	src: __m512d,
16327	mask: __mmask8,
16328	offsets: __m512i,
16329	slice: *const u8,
16330	) -> __m512d {
16331	static_assert_imm8_scale!(SCALE);
16332	let src: f64x8 = src.as_f64x8();
16333	let slice: const i8 = slice as const i8;
16334	let offsets: i64x8 = offsets.as_i64x8();
16335	let r: f64x8 = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
16336	transmute(src:r)
16337	}
16338
16339	/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16340	///
16341	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
16342	#[inline]
16343	#[target_feature(enable = "avx512f")]
16344	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16345	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
16346	#[rustc_legacy_const_generics(`2`)]
16347	pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m256 {
16348	static_assert_imm8_scale!(SCALE);
16349	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
16350	let neg_one: i8 = `-1`;
16351	let slice: const i8 = slice as const i8;
16352	let offsets: i64x8 = offsets.as_i64x8();
16353	let r: f32x8 = vgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE);
16354	transmute(src:r)
16355	}
16356
16357	/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16358	///
16359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
16360	#[inline]
16361	#[target_feature(enable = "avx512f")]
16362	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16363	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
16364	#[rustc_legacy_const_generics(`4`)]
16365	pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
16366	src: __m256,
16367	mask: __mmask8,
16368	offsets: __m512i,
16369	slice: *const u8,
16370	) -> __m256 {
16371	static_assert_imm8_scale!(SCALE);
16372	let src: f32x8 = src.as_f32x8();
16373	let slice: const i8 = slice as const i8;
16374	let offsets: i64x8 = offsets.as_i64x8();
16375	let r: f32x8 = vgatherqps(src, slice, offsets, mask as i8, SCALE);
16376	transmute(src:r)
16377	}
16378
16379	/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16380	///
16381	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
16382	#[inline]
16383	#[target_feature(enable = "avx512f")]
16384	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16385	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
16386	#[rustc_legacy_const_generics(`2`)]
16387	pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512 {
16388	static_assert_imm8_scale!(SCALE);
16389	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
16390	let neg_one: i16 = `-1`;
16391	let slice: const i8 = slice as const i8;
16392	let offsets: i32x16 = offsets.as_i32x16();
16393	let r: f32x16 = vgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE);
16394	transmute(src:r)
16395	}
16396
16397	/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16398	///
16399	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
16400	#[inline]
16401	#[target_feature(enable = "avx512f")]
16402	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16403	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
16404	#[rustc_legacy_const_generics(`4`)]
16405	pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
16406	src: __m512,
16407	mask: __mmask16,
16408	offsets: __m512i,
16409	slice: *const u8,
16410	) -> __m512 {
16411	static_assert_imm8_scale!(SCALE);
16412	let src: f32x16 = src.as_f32x16();
16413	let slice: const i8 = slice as const i8;
16414	let offsets: i32x16 = offsets.as_i32x16();
16415	let r: f32x16 = vgatherdps(src, slice, offsets, mask as i16, SCALE);
16416	transmute(src:r)
16417	}
16418
16419	/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16420	///
16421	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
16422	#[inline]
16423	#[target_feature(enable = "avx512f")]
16424	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16425	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
16426	#[rustc_legacy_const_generics(`2`)]
16427	pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
16428	offsets: __m512i,
16429	slice: *const u8,
16430	) -> __m512i {
16431	static_assert_imm8_scale!(SCALE);
16432	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
16433	let neg_one: i16 = `-1`;
16434	let slice: const i8 = slice as const i8;
16435	let offsets: i32x16 = offsets.as_i32x16();
16436	let r: i32x16 = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE);
16437	transmute(src:r)
16438	}
16439
16440	/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16441	///
16442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
16443	#[inline]
16444	#[target_feature(enable = "avx512f")]
16445	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16446	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
16447	#[rustc_legacy_const_generics(`4`)]
16448	pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
16449	src: __m512i,
16450	mask: __mmask16,
16451	offsets: __m512i,
16452	slice: *const u8,
16453	) -> __m512i {
16454	static_assert_imm8_scale!(SCALE);
16455	let src: i32x16 = src.as_i32x16();
16456	let mask: i16 = mask as i16;
16457	let slice: const i8 = slice as const i8;
16458	let offsets: i32x16 = offsets.as_i32x16();
16459	let r: i32x16 = vpgatherdd(src, slice, offsets, mask, SCALE);
16460	transmute(src:r)
16461	}
16462
16463	/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16464	///
16465	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
16466	#[inline]
16467	#[target_feature(enable = "avx512f")]
16468	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16469	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
16470	#[rustc_legacy_const_generics(`2`)]
16471	pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
16472	offsets: __m256i,
16473	slice: *const u8,
16474	) -> __m512i {
16475	static_assert_imm8_scale!(SCALE);
16476	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
16477	let neg_one: i8 = `-1`;
16478	let slice: const i8 = slice as const i8;
16479	let offsets: i32x8 = offsets.as_i32x8();
16480	let r: i64x8 = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE);
16481	transmute(src:r)
16482	}
16483
16484	/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16485	///
16486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
16487	#[inline]
16488	#[target_feature(enable = "avx512f")]
16489	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16490	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
16491	#[rustc_legacy_const_generics(`4`)]
16492	pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
16493	src: __m512i,
16494	mask: __mmask8,
16495	offsets: __m256i,
16496	slice: *const u8,
16497	) -> __m512i {
16498	static_assert_imm8_scale!(SCALE);
16499	let src: i64x8 = src.as_i64x8();
16500	let mask: i8 = mask as i8;
16501	let slice: const i8 = slice as const i8;
16502	let offsets: i32x8 = offsets.as_i32x8();
16503	let r: i64x8 = vpgatherdq(src, slice, offsets, mask, SCALE);
16504	transmute(src:r)
16505	}
16506
16507	/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16508	///
16509	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
16510	#[inline]
16511	#[target_feature(enable = "avx512f")]
16512	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16513	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
16514	#[rustc_legacy_const_generics(`2`)]
16515	pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
16516	offsets: __m512i,
16517	slice: *const u8,
16518	) -> __m512i {
16519	static_assert_imm8_scale!(SCALE);
16520	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
16521	let neg_one: i8 = `-1`;
16522	let slice: const i8 = slice as const i8;
16523	let offsets: i64x8 = offsets.as_i64x8();
16524	let r: i64x8 = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE);
16525	transmute(src:r)
16526	}
16527
16528	/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16529	///
16530	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
16531	#[inline]
16532	#[target_feature(enable = "avx512f")]
16533	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16534	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
16535	#[rustc_legacy_const_generics(`4`)]
16536	pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
16537	src: __m512i,
16538	mask: __mmask8,
16539	offsets: __m512i,
16540	slice: *const u8,
16541	) -> __m512i {
16542	static_assert_imm8_scale!(SCALE);
16543	let src: i64x8 = src.as_i64x8();
16544	let mask: i8 = mask as i8;
16545	let slice: const i8 = slice as const i8;
16546	let offsets: i64x8 = offsets.as_i64x8();
16547	let r: i64x8 = vpgatherqq(src, slice, offsets, mask, SCALE);
16548	transmute(src:r)
16549	}
16550
16551	/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16552	///
16553	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
16554	#[inline]
16555	#[target_feature(enable = "avx512f")]
16556	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16557	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
16558	#[rustc_legacy_const_generics(`2`)]
16559	pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
16560	offsets: __m512i,
16561	slice: *const u8,
16562	) -> __m256i {
16563	static_assert_imm8_scale!(SCALE);
16564	let zeros: i32x8 = _mm256_setzero_si256().as_i32x8();
16565	let neg_one: i8 = `-1`;
16566	let slice: const i8 = slice as const i8;
16567	let offsets: i64x8 = offsets.as_i64x8();
16568	let r: i32x8 = vpgatherqd(src:zeros, slice, offsets, mask:neg_one, SCALE);
16569	transmute(src:r)
16570	}
16571
16572	/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16573	///
16574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
16575	#[inline]
16576	#[target_feature(enable = "avx512f")]
16577	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16578	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
16579	#[rustc_legacy_const_generics(`4`)]
16580	pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
16581	src: __m256i,
16582	mask: __mmask8,
16583	offsets: __m512i,
16584	slice: *const u8,
16585	) -> __m256i {
16586	static_assert_imm8_scale!(SCALE);
16587	let src: i32x8 = src.as_i32x8();
16588	let mask: i8 = mask as i8;
16589	let slice: const i8 = slice as const i8;
16590	let offsets: i64x8 = offsets.as_i64x8();
16591	let r: i32x8 = vpgatherqd(src, slice, offsets, mask, SCALE);
16592	transmute(src:r)
16593	}
16594
16595	/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16596	///
16597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
16598	#[inline]
16599	#[target_feature(enable = "avx512f")]
16600	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16601	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
16602	#[rustc_legacy_const_generics(`3`)]
16603	pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
16604	slice: *mut u8,
16605	offsets: __m256i,
16606	src: __m512d,
16607	) {
16608	static_assert_imm8_scale!(SCALE);
16609	let src: f64x8 = src.as_f64x8();
16610	let neg_one: i8 = `-1`;
16611	let slice: mut i8 = slice as mut i8;
16612	let offsets: i32x8 = offsets.as_i32x8();
16613	vscatterdpd(slice, mask:neg_one, offsets, src, SCALE);
16614	}
16615
16616	/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16617	///
16618	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
16619	#[inline]
16620	#[target_feature(enable = "avx512f")]
16621	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16622	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
16623	#[rustc_legacy_const_generics(`4`)]
16624	pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
16625	slice: *mut u8,
16626	mask: __mmask8,
16627	offsets: __m256i,
16628	src: __m512d,
16629	) {
16630	static_assert_imm8_scale!(SCALE);
16631	let src: f64x8 = src.as_f64x8();
16632	let slice: mut i8 = slice as mut i8;
16633	let offsets: i32x8 = offsets.as_i32x8();
16634	vscatterdpd(slice, mask as i8, offsets, src, SCALE);
16635	}
16636
16637	/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16638	///
16639	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
16640	#[inline]
16641	#[target_feature(enable = "avx512f")]
16642	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16643	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
16644	#[rustc_legacy_const_generics(`3`)]
16645	pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
16646	slice: *mut u8,
16647	offsets: __m512i,
16648	src: __m512d,
16649	) {
16650	static_assert_imm8_scale!(SCALE);
16651	let src: f64x8 = src.as_f64x8();
16652	let neg_one: i8 = `-1`;
16653	let slice: mut i8 = slice as mut i8;
16654	let offsets: i64x8 = offsets.as_i64x8();
16655	vscatterqpd(slice, mask:neg_one, offsets, src, SCALE);
16656	}
16657
16658	/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16659	///
16660	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
16661	#[inline]
16662	#[target_feature(enable = "avx512f")]
16663	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16664	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
16665	#[rustc_legacy_const_generics(`4`)]
16666	pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
16667	slice: *mut u8,
16668	mask: __mmask8,
16669	offsets: __m512i,
16670	src: __m512d,
16671	) {
16672	static_assert_imm8_scale!(SCALE);
16673	let src: f64x8 = src.as_f64x8();
16674	let slice: mut i8 = slice as mut i8;
16675	let offsets: i64x8 = offsets.as_i64x8();
16676	vscatterqpd(slice, mask as i8, offsets, src, SCALE);
16677	}
16678
16679	/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16680	///
16681	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
16682	#[inline]
16683	#[target_feature(enable = "avx512f")]
16684	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16685	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
16686	#[rustc_legacy_const_generics(`3`)]
16687	pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
16688	slice: *mut u8,
16689	offsets: __m512i,
16690	src: __m512,
16691	) {
16692	static_assert_imm8_scale!(SCALE);
16693	let src: f32x16 = src.as_f32x16();
16694	let neg_one: i16 = `-1`;
16695	let slice: mut i8 = slice as mut i8;
16696	let offsets: i32x16 = offsets.as_i32x16();
16697	vscatterdps(slice, mask:neg_one, offsets, src, SCALE);
16698	}
16699
16700	/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16701	///
16702	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
16703	#[inline]
16704	#[target_feature(enable = "avx512f")]
16705	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16706	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
16707	#[rustc_legacy_const_generics(`4`)]
16708	pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
16709	slice: *mut u8,
16710	mask: __mmask16,
16711	offsets: __m512i,
16712	src: __m512,
16713	) {
16714	static_assert_imm8_scale!(SCALE);
16715	let src: f32x16 = src.as_f32x16();
16716	let slice: mut i8 = slice as mut i8;
16717	let offsets: i32x16 = offsets.as_i32x16();
16718	vscatterdps(slice, mask as i16, offsets, src, SCALE);
16719	}
16720
16721	/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16722	///
16723	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
16724	#[inline]
16725	#[target_feature(enable = "avx512f")]
16726	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16727	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
16728	#[rustc_legacy_const_generics(`3`)]
16729	pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
16730	slice: *mut u8,
16731	offsets: __m512i,
16732	src: __m256,
16733	) {
16734	static_assert_imm8_scale!(SCALE);
16735	let src: f32x8 = src.as_f32x8();
16736	let neg_one: i8 = `-1`;
16737	let slice: mut i8 = slice as mut i8;
16738	let offsets: i64x8 = offsets.as_i64x8();
16739	vscatterqps(slice, mask:neg_one, offsets, src, SCALE);
16740	}
16741
16742	/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16743	///
16744	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
16745	#[inline]
16746	#[target_feature(enable = "avx512f")]
16747	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16748	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
16749	#[rustc_legacy_const_generics(`4`)]
16750	pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
16751	slice: *mut u8,
16752	mask: __mmask8,
16753	offsets: __m512i,
16754	src: __m256,
16755	) {
16756	static_assert_imm8_scale!(SCALE);
16757	let src: f32x8 = src.as_f32x8();
16758	let slice: mut i8 = slice as mut i8;
16759	let offsets: i64x8 = offsets.as_i64x8();
16760	vscatterqps(slice, mask as i8, offsets, src, SCALE);
16761	}
16762
16763	/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16764	///
16765	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
16766	#[inline]
16767	#[target_feature(enable = "avx512f")]
16768	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16769	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
16770	#[rustc_legacy_const_generics(`3`)]
16771	pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
16772	slice: *mut u8,
16773	offsets: __m256i,
16774	src: __m512i,
16775	) {
16776	static_assert_imm8_scale!(SCALE);
16777	let src: i64x8 = src.as_i64x8();
16778	let neg_one: i8 = `-1`;
16779	let slice: mut i8 = slice as mut i8;
16780	let offsets: i32x8 = offsets.as_i32x8();
16781	vpscatterdq(slice, mask:neg_one, offsets, src, SCALE);
16782	}
16783
16784	/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16785	///
16786	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
16787	#[inline]
16788	#[target_feature(enable = "avx512f")]
16789	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16790	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
16791	#[rustc_legacy_const_generics(`4`)]
16792	pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
16793	slice: *mut u8,
16794	mask: __mmask8,
16795	offsets: __m256i,
16796	src: __m512i,
16797	) {
16798	static_assert_imm8_scale!(SCALE);
16799	let src: i64x8 = src.as_i64x8();
16800	let mask: i8 = mask as i8;
16801	let slice: mut i8 = slice as mut i8;
16802	let offsets: i32x8 = offsets.as_i32x8();
16803	vpscatterdq(slice, mask, offsets, src, SCALE);
16804	}
16805
16806	/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16807	///
16808	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
16809	#[inline]
16810	#[target_feature(enable = "avx512f,avx512vl")]
16811	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16812	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
16813	#[rustc_legacy_const_generics(`3`)]
16814	pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
16815	slice: *mut u8,
16816	offsets: __m128i,
16817	src: __m256i,
16818	) {
16819	static_assert_imm8_scale!(SCALE);
16820	let src: i64x4 = src.as_i64x4();
16821	let neg_one: i8 = `-1`;
16822	let slice: mut i8 = slice as mut i8;
16823	let offsets: i32x4 = offsets.as_i32x4();
16824	vpscatterdq256(slice, mask:neg_one, offsets, src, SCALE);
16825	}
16826
16827	/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16828	///
16829	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
16830	#[inline]
16831	#[target_feature(enable = "avx512f")]
16832	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16833	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
16834	#[rustc_legacy_const_generics(`3`)]
16835	pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
16836	slice: *mut u8,
16837	offsets: __m512i,
16838	src: __m512i,
16839	) {
16840	static_assert_imm8_scale!(SCALE);
16841	let src: i64x8 = src.as_i64x8();
16842	let neg_one: i8 = `-1`;
16843	let slice: mut i8 = slice as mut i8;
16844	let offsets: i64x8 = offsets.as_i64x8();
16845	vpscatterqq(slice, mask:neg_one, offsets, src, SCALE);
16846	}
16847
16848	/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16849	///
16850	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
16851	#[inline]
16852	#[target_feature(enable = "avx512f")]
16853	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16854	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
16855	#[rustc_legacy_const_generics(`4`)]
16856	pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
16857	slice: *mut u8,
16858	mask: __mmask8,
16859	offsets: __m512i,
16860	src: __m512i,
16861	) {
16862	static_assert_imm8_scale!(SCALE);
16863	let src: i64x8 = src.as_i64x8();
16864	let mask: i8 = mask as i8;
16865	let slice: mut i8 = slice as mut i8;
16866	let offsets: i64x8 = offsets.as_i64x8();
16867	vpscatterqq(slice, mask, offsets, src, SCALE);
16868	}
16869
16870	/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16871	///
16872	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
16873	#[inline]
16874	#[target_feature(enable = "avx512f")]
16875	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16876	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
16877	#[rustc_legacy_const_generics(`3`)]
16878	pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
16879	slice: *mut u8,
16880	offsets: __m512i,
16881	src: __m512i,
16882	) {
16883	static_assert_imm8_scale!(SCALE);
16884	let src: i32x16 = src.as_i32x16();
16885	let neg_one: i16 = `-1`;
16886	let slice: mut i8 = slice as mut i8;
16887	let offsets: i32x16 = offsets.as_i32x16();
16888	vpscatterdd(slice, mask:neg_one, offsets, src, SCALE);
16889	}
16890
16891	/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16892	///
16893	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
16894	#[inline]
16895	#[target_feature(enable = "avx512f")]
16896	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16897	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
16898	#[rustc_legacy_const_generics(`4`)]
16899	pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
16900	slice: *mut u8,
16901	mask: __mmask16,
16902	offsets: __m512i,
16903	src: __m512i,
16904	) {
16905	static_assert_imm8_scale!(SCALE);
16906	let src: i32x16 = src.as_i32x16();
16907	let mask: i16 = mask as i16;
16908	let slice: mut i8 = slice as mut i8;
16909	let offsets: i32x16 = offsets.as_i32x16();
16910	vpscatterdd(slice, mask, offsets, src, SCALE);
16911	}
16912
16913	/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16914	///
16915	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
16916	#[inline]
16917	#[target_feature(enable = "avx512f")]
16918	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16919	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
16920	#[rustc_legacy_const_generics(`3`)]
16921	pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
16922	slice: *mut u8,
16923	offsets: __m512i,
16924	src: __m256i,
16925	) {
16926	static_assert_imm8_scale!(SCALE);
16927	let src: i32x8 = src.as_i32x8();
16928	let neg_one: i8 = `-1`;
16929	let slice: mut i8 = slice as mut i8;
16930	let offsets: i64x8 = offsets.as_i64x8();
16931	vpscatterqd(slice, mask:neg_one, offsets, src, SCALE);
16932	}
16933
16934	/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16935	///
16936	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
16937	#[inline]
16938	#[target_feature(enable = "avx512f")]
16939	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16940	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
16941	#[rustc_legacy_const_generics(`4`)]
16942	pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
16943	slice: *mut u8,
16944	mask: __mmask8,
16945	offsets: __m512i,
16946	src: __m256i,
16947	) {
16948	static_assert_imm8_scale!(SCALE);
16949	let src: i32x8 = src.as_i32x8();
16950	let mask: i8 = mask as i8;
16951	let slice: mut i8 = slice as mut i8;
16952	let offsets: i64x8 = offsets.as_i64x8();
16953	vpscatterqd(slice, mask, offsets, src, SCALE);
16954	}
16955
16956	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
16957	///
16958	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
16959	#[inline]
16960	#[target_feature(enable = "avx512f")]
16961	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16962	#[cfg_attr(test, assert_instr(vpcompressd))]
16963	pub unsafe fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
16964	transmute(src:vpcompressd(a:a.as_i32x16(), src:src.as_i32x16(), mask:k))
16965	}
16966
16967	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
16968	///
16969	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
16970	#[inline]
16971	#[target_feature(enable = "avx512f")]
16972	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16973	#[cfg_attr(test, assert_instr(vpcompressd))]
16974	pub unsafe fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
16975	transmute(src:vpcompressd(
16976	a:a.as_i32x16(),
16977	src:_mm512_setzero_si512().as_i32x16(),
16978	mask:k,
16979	))
16980	}
16981
16982	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
16983	///
16984	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
16985	#[inline]
16986	#[target_feature(enable = "avx512f,avx512vl")]
16987	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16988	#[cfg_attr(test, assert_instr(vpcompressd))]
16989	pub unsafe fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
16990	transmute(src:vpcompressd256(a:a.as_i32x8(), src:src.as_i32x8(), mask:k))
16991	}
16992
16993	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
16994	///
16995	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
16996	#[inline]
16997	#[target_feature(enable = "avx512f,avx512vl")]
16998	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16999	#[cfg_attr(test, assert_instr(vpcompressd))]
17000	pub unsafe fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
17001	transmute(src:vpcompressd256(
17002	a:a.as_i32x8(),
17003	src:_mm256_setzero_si256().as_i32x8(),
17004	mask:k,
17005	))
17006	}
17007
17008	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17009	///
17010	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
17011	#[inline]
17012	#[target_feature(enable = "avx512f,avx512vl")]
17013	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17014	#[cfg_attr(test, assert_instr(vpcompressd))]
17015	pub unsafe fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
17016	transmute(src:vpcompressd128(a:a.as_i32x4(), src:src.as_i32x4(), mask:k))
17017	}
17018
17019	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17020	///
17021	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
17022	#[inline]
17023	#[target_feature(enable = "avx512f,avx512vl")]
17024	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17025	#[cfg_attr(test, assert_instr(vpcompressd))]
17026	pub unsafe fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
17027	transmute(src:vpcompressd128(
17028	a:a.as_i32x4(),
17029	src:_mm_setzero_si128().as_i32x4(),
17030	mask:k,
17031	))
17032	}
17033
17034	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17035	///
17036	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
17037	#[inline]
17038	#[target_feature(enable = "avx512f")]
17039	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17040	#[cfg_attr(test, assert_instr(vpcompressq))]
17041	pub unsafe fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
17042	transmute(src:vpcompressq(a:a.as_i64x8(), src:src.as_i64x8(), mask:k))
17043	}
17044
17045	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17046	///
17047	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
17048	#[inline]
17049	#[target_feature(enable = "avx512f")]
17050	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17051	#[cfg_attr(test, assert_instr(vpcompressq))]
17052	pub unsafe fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
17053	transmute(src:vpcompressq(
17054	a:a.as_i64x8(),
17055	src:_mm512_setzero_si512().as_i64x8(),
17056	mask:k,
17057	))
17058	}
17059
17060	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17061	///
17062	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
17063	#[inline]
17064	#[target_feature(enable = "avx512f,avx512vl")]
17065	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17066	#[cfg_attr(test, assert_instr(vpcompressq))]
17067	pub unsafe fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
17068	transmute(src:vpcompressq256(a:a.as_i64x4(), src:src.as_i64x4(), mask:k))
17069	}
17070
17071	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17072	///
17073	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
17074	#[inline]
17075	#[target_feature(enable = "avx512f,avx512vl")]
17076	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17077	#[cfg_attr(test, assert_instr(vpcompressq))]
17078	pub unsafe fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
17079	transmute(src:vpcompressq256(
17080	a:a.as_i64x4(),
17081	src:_mm256_setzero_si256().as_i64x4(),
17082	mask:k,
17083	))
17084	}
17085
17086	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17087	///
17088	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
17089	#[inline]
17090	#[target_feature(enable = "avx512f,avx512vl")]
17091	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17092	#[cfg_attr(test, assert_instr(vpcompressq))]
17093	pub unsafe fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
17094	transmute(src:vpcompressq128(a:a.as_i64x2(), src:src.as_i64x2(), mask:k))
17095	}
17096
17097	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17098	///
17099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
17100	#[inline]
17101	#[target_feature(enable = "avx512f,avx512vl")]
17102	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17103	#[cfg_attr(test, assert_instr(vpcompressq))]
17104	pub unsafe fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
17105	transmute(src:vpcompressq128(
17106	a:a.as_i64x2(),
17107	src:_mm_setzero_si128().as_i64x2(),
17108	mask:k,
17109	))
17110	}
17111
17112	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17113	///
17114	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
17115	#[inline]
17116	#[target_feature(enable = "avx512f")]
17117	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17118	#[cfg_attr(test, assert_instr(vcompressps))]
17119	pub unsafe fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
17120	transmute(src:vcompressps(a:a.as_f32x16(), src:src.as_f32x16(), mask:k))
17121	}
17122
17123	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17124	///
17125	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
17126	#[inline]
17127	#[target_feature(enable = "avx512f")]
17128	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17129	#[cfg_attr(test, assert_instr(vcompressps))]
17130	pub unsafe fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
17131	transmute(src:vcompressps(
17132	a:a.as_f32x16(),
17133	src:_mm512_setzero_ps().as_f32x16(),
17134	mask:k,
17135	))
17136	}
17137
17138	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17139	///
17140	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
17141	#[inline]
17142	#[target_feature(enable = "avx512f,avx512vl")]
17143	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17144	#[cfg_attr(test, assert_instr(vcompressps))]
17145	pub unsafe fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
17146	transmute(src:vcompressps256(a:a.as_f32x8(), src:src.as_f32x8(), mask:k))
17147	}
17148
17149	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17150	///
17151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
17152	#[inline]
17153	#[target_feature(enable = "avx512f,avx512vl")]
17154	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17155	#[cfg_attr(test, assert_instr(vcompressps))]
17156	pub unsafe fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
17157	transmute(src:vcompressps256(
17158	a:a.as_f32x8(),
17159	src:_mm256_setzero_ps().as_f32x8(),
17160	mask:k,
17161	))
17162	}
17163
17164	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17165	///
17166	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
17167	#[inline]
17168	#[target_feature(enable = "avx512f,avx512vl")]
17169	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17170	#[cfg_attr(test, assert_instr(vcompressps))]
17171	pub unsafe fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
17172	transmute(src:vcompressps128(a:a.as_f32x4(), src:src.as_f32x4(), mask:k))
17173	}
17174
17175	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17176	///
17177	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
17178	#[inline]
17179	#[target_feature(enable = "avx512f,avx512vl")]
17180	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17181	#[cfg_attr(test, assert_instr(vcompressps))]
17182	pub unsafe fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
17183	transmute(src:vcompressps128(a:a.as_f32x4(), src:_mm_setzero_ps().as_f32x4(), mask:k))
17184	}
17185
17186	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17187	///
17188	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
17189	#[inline]
17190	#[target_feature(enable = "avx512f")]
17191	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17192	#[cfg_attr(test, assert_instr(vcompresspd))]
17193	pub unsafe fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
17194	transmute(src:vcompresspd(a:a.as_f64x8(), src:src.as_f64x8(), mask:k))
17195	}
17196
17197	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17198	///
17199	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
17200	#[inline]
17201	#[target_feature(enable = "avx512f")]
17202	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17203	#[cfg_attr(test, assert_instr(vcompresspd))]
17204	pub unsafe fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
17205	transmute(src:vcompresspd(a:a.as_f64x8(), src:_mm512_setzero_pd().as_f64x8(), mask:k))
17206	}
17207
17208	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17209	///
17210	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
17211	#[inline]
17212	#[target_feature(enable = "avx512f,avx512vl")]
17213	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17214	#[cfg_attr(test, assert_instr(vcompresspd))]
17215	pub unsafe fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
17216	transmute(src:vcompresspd256(a:a.as_f64x4(), src:src.as_f64x4(), mask:k))
17217	}
17218
17219	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17220	///
17221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
17222	#[inline]
17223	#[target_feature(enable = "avx512f,avx512vl")]
17224	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17225	#[cfg_attr(test, assert_instr(vcompresspd))]
17226	pub unsafe fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
17227	transmute(src:vcompresspd256(
17228	a:a.as_f64x4(),
17229	src:_mm256_setzero_pd().as_f64x4(),
17230	mask:k,
17231	))
17232	}
17233
17234	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17235	///
17236	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
17237	#[inline]
17238	#[target_feature(enable = "avx512f,avx512vl")]
17239	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17240	#[cfg_attr(test, assert_instr(vcompresspd))]
17241	pub unsafe fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
17242	transmute(src:vcompresspd128(a:a.as_f64x2(), src:src.as_f64x2(), mask:k))
17243	}
17244
17245	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17246	///
17247	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
17248	#[inline]
17249	#[target_feature(enable = "avx512f,avx512vl")]
17250	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17251	#[cfg_attr(test, assert_instr(vcompresspd))]
17252	pub unsafe fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
17253	transmute(src:vcompresspd128(a:a.as_f64x2(), src:_mm_setzero_pd().as_f64x2(), mask:k))
17254	}
17255
17256	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17257	///
17258	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
17259	#[inline]
17260	#[target_feature(enable = "avx512f")]
17261	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17262	#[cfg_attr(test, assert_instr(vpcompressd))]
17263	pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask16, a: __m512i) {
17264	vcompressstored(mem:base_addr as *mut _, data:a.as_i32x16(), mask:k)
17265	}
17266
17267	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17268	///
17269	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
17270	#[inline]
17271	#[target_feature(enable = "avx512f,avx512vl")]
17272	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17273	#[cfg_attr(test, assert_instr(vpcompressd))]
17274	pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m256i) {
17275	vcompressstored256(mem:base_addr as *mut _, data:a.as_i32x8(), mask:k)
17276	}
17277
17278	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17279	///
17280	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
17281	#[inline]
17282	#[target_feature(enable = "avx512f,avx512vl")]
17283	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17284	#[cfg_attr(test, assert_instr(vpcompressd))]
17285	pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m128i) {
17286	vcompressstored128(mem:base_addr as *mut _, data:a.as_i32x4(), mask:k)
17287	}
17288
17289	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17290	///
17291	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
17292	#[inline]
17293	#[target_feature(enable = "avx512f")]
17294	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17295	#[cfg_attr(test, assert_instr(vpcompressq))]
17296	pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m512i) {
17297	vcompressstoreq(mem:base_addr as *mut _, data:a.as_i64x8(), mask:k)
17298	}
17299
17300	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17301	///
17302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
17303	#[inline]
17304	#[target_feature(enable = "avx512f,avx512vl")]
17305	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17306	#[cfg_attr(test, assert_instr(vpcompressq))]
17307	pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m256i) {
17308	vcompressstoreq256(mem:base_addr as *mut _, data:a.as_i64x4(), mask:k)
17309	}
17310
17311	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17312	///
17313	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
17314	#[inline]
17315	#[target_feature(enable = "avx512f,avx512vl")]
17316	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17317	#[cfg_attr(test, assert_instr(vpcompressq))]
17318	pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m128i) {
17319	vcompressstoreq128(mem:base_addr as *mut _, data:a.as_i64x2(), mask:k)
17320	}
17321
17322	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17323	///
17324	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
17325	#[inline]
17326	#[target_feature(enable = "avx512f")]
17327	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17328	#[cfg_attr(test, assert_instr(vcompressps))]
17329	pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask16, a: __m512) {
17330	vcompressstoreps(mem:base_addr as *mut _, data:a.as_f32x16(), mask:k)
17331	}
17332
17333	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17334	///
17335	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
17336	#[inline]
17337	#[target_feature(enable = "avx512f,avx512vl")]
17338	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17339	#[cfg_attr(test, assert_instr(vcompressps))]
17340	pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m256) {
17341	vcompressstoreps256(mem:base_addr as *mut _, data:a.as_f32x8(), mask:k)
17342	}
17343
17344	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17345	///
17346	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
17347	#[inline]
17348	#[target_feature(enable = "avx512f,avx512vl")]
17349	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17350	#[cfg_attr(test, assert_instr(vcompressps))]
17351	pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m128) {
17352	vcompressstoreps128(mem:base_addr as *mut _, data:a.as_f32x4(), mask:k)
17353	}
17354
17355	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17356	///
17357	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
17358	#[inline]
17359	#[target_feature(enable = "avx512f")]
17360	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17361	#[cfg_attr(test, assert_instr(vcompresspd))]
17362	pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m512d) {
17363	vcompressstorepd(mem:base_addr as *mut _, data:a.as_f64x8(), mask:k)
17364	}
17365
17366	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17367	///
17368	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
17369	#[inline]
17370	#[target_feature(enable = "avx512f,avx512vl")]
17371	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17372	#[cfg_attr(test, assert_instr(vcompresspd))]
17373	pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m256d) {
17374	vcompressstorepd256(mem:base_addr as *mut _, data:a.as_f64x4(), mask:k)
17375	}
17376
17377	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17378	///
17379	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
17380	#[inline]
17381	#[target_feature(enable = "avx512f,avx512vl")]
17382	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17383	#[cfg_attr(test, assert_instr(vcompresspd))]
17384	pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m128d) {
17385	vcompressstorepd128(mem:base_addr as *mut _, data:a.as_f64x2(), mask:k)
17386	}
17387
17388	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17389	///
17390	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
17391	#[inline]
17392	#[target_feature(enable = "avx512f")]
17393	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17394	#[cfg_attr(test, assert_instr(vpexpandd))]
17395	pub unsafe fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
17396	transmute(src:vpexpandd(a:a.as_i32x16(), src:src.as_i32x16(), mask:k))
17397	}
17398
17399	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17400	///
17401	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
17402	#[inline]
17403	#[target_feature(enable = "avx512f")]
17404	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17405	#[cfg_attr(test, assert_instr(vpexpandd))]
17406	pub unsafe fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
17407	transmute(src:vpexpandd(
17408	a:a.as_i32x16(),
17409	src:_mm512_setzero_si512().as_i32x16(),
17410	mask:k,
17411	))
17412	}
17413
17414	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17415	///
17416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
17417	#[inline]
17418	#[target_feature(enable = "avx512f,avx512vl")]
17419	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17420	#[cfg_attr(test, assert_instr(vpexpandd))]
17421	pub unsafe fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
17422	transmute(src:vpexpandd256(a:a.as_i32x8(), src:src.as_i32x8(), mask:k))
17423	}
17424
17425	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17426	///
17427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
17428	#[inline]
17429	#[target_feature(enable = "avx512f,avx512vl")]
17430	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17431	#[cfg_attr(test, assert_instr(vpexpandd))]
17432	pub unsafe fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
17433	transmute(src:vpexpandd256(
17434	a:a.as_i32x8(),
17435	src:_mm256_setzero_si256().as_i32x8(),
17436	mask:k,
17437	))
17438	}
17439
17440	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17441	///
17442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
17443	#[inline]
17444	#[target_feature(enable = "avx512f,avx512vl")]
17445	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17446	#[cfg_attr(test, assert_instr(vpexpandd))]
17447	pub unsafe fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
17448	transmute(src:vpexpandd128(a:a.as_i32x4(), src:src.as_i32x4(), mask:k))
17449	}
17450
17451	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17452	///
17453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
17454	#[inline]
17455	#[target_feature(enable = "avx512f,avx512vl")]
17456	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17457	#[cfg_attr(test, assert_instr(vpexpandd))]
17458	pub unsafe fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
17459	transmute(src:vpexpandd128(
17460	a:a.as_i32x4(),
17461	src:_mm_setzero_si128().as_i32x4(),
17462	mask:k,
17463	))
17464	}
17465
17466	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17467	///
17468	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
17469	#[inline]
17470	#[target_feature(enable = "avx512f")]
17471	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17472	#[cfg_attr(test, assert_instr(vpexpandq))]
17473	pub unsafe fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
17474	transmute(src:vpexpandq(a:a.as_i64x8(), src:src.as_i64x8(), mask:k))
17475	}
17476
17477	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17478	///
17479	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
17480	#[inline]
17481	#[target_feature(enable = "avx512f")]
17482	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17483	#[cfg_attr(test, assert_instr(vpexpandq))]
17484	pub unsafe fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
17485	transmute(src:vpexpandq(
17486	a:a.as_i64x8(),
17487	src:_mm512_setzero_si512().as_i64x8(),
17488	mask:k,
17489	))
17490	}
17491
17492	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17493	///
17494	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
17495	#[inline]
17496	#[target_feature(enable = "avx512f,avx512vl")]
17497	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17498	#[cfg_attr(test, assert_instr(vpexpandq))]
17499	pub unsafe fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
17500	transmute(src:vpexpandq256(a:a.as_i64x4(), src:src.as_i64x4(), mask:k))
17501	}
17502
17503	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17504	///
17505	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
17506	#[inline]
17507	#[target_feature(enable = "avx512f,avx512vl")]
17508	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17509	#[cfg_attr(test, assert_instr(vpexpandq))]
17510	pub unsafe fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
17511	transmute(src:vpexpandq256(
17512	a:a.as_i64x4(),
17513	src:_mm256_setzero_si256().as_i64x4(),
17514	mask:k,
17515	))
17516	}
17517
17518	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17519	///
17520	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
17521	#[inline]
17522	#[target_feature(enable = "avx512f,avx512vl")]
17523	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17524	#[cfg_attr(test, assert_instr(vpexpandq))]
17525	pub unsafe fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
17526	transmute(src:vpexpandq128(a:a.as_i64x2(), src:src.as_i64x2(), mask:k))
17527	}
17528
17529	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17530	///
17531	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
17532	#[inline]
17533	#[target_feature(enable = "avx512f,avx512vl")]
17534	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17535	#[cfg_attr(test, assert_instr(vpexpandq))]
17536	pub unsafe fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
17537	transmute(src:vpexpandq128(
17538	a:a.as_i64x2(),
17539	src:_mm_setzero_si128().as_i64x2(),
17540	mask:k,
17541	))
17542	}
17543
17544	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17545	///
17546	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
17547	#[inline]
17548	#[target_feature(enable = "avx512f")]
17549	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17550	#[cfg_attr(test, assert_instr(vexpandps))]
17551	pub unsafe fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
17552	transmute(src:vexpandps(a:a.as_f32x16(), src:src.as_f32x16(), mask:k))
17553	}
17554
17555	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17556	///
17557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
17558	#[inline]
17559	#[target_feature(enable = "avx512f")]
17560	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17561	#[cfg_attr(test, assert_instr(vexpandps))]
17562	pub unsafe fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
17563	transmute(src:vexpandps(a:a.as_f32x16(), src:_mm512_setzero_ps().as_f32x16(), mask:k))
17564	}
17565
17566	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17567	///
17568	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
17569	#[inline]
17570	#[target_feature(enable = "avx512f,avx512vl")]
17571	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17572	#[cfg_attr(test, assert_instr(vexpandps))]
17573	pub unsafe fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
17574	transmute(src:vexpandps256(a:a.as_f32x8(), src:src.as_f32x8(), mask:k))
17575	}
17576
17577	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17578	///
17579	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
17580	#[inline]
17581	#[target_feature(enable = "avx512f,avx512vl")]
17582	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17583	#[cfg_attr(test, assert_instr(vexpandps))]
17584	pub unsafe fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
17585	transmute(src:vexpandps256(
17586	a:a.as_f32x8(),
17587	src:_mm256_setzero_ps().as_f32x8(),
17588	mask:k,
17589	))
17590	}
17591
17592	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17593	///
17594	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
17595	#[inline]
17596	#[target_feature(enable = "avx512f,avx512vl")]
17597	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17598	#[cfg_attr(test, assert_instr(vexpandps))]
17599	pub unsafe fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
17600	transmute(src:vexpandps128(a:a.as_f32x4(), src:src.as_f32x4(), mask:k))
17601	}
17602
17603	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17604	///
17605	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
17606	#[inline]
17607	#[target_feature(enable = "avx512f,avx512vl")]
17608	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17609	#[cfg_attr(test, assert_instr(vexpandps))]
17610	pub unsafe fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
17611	transmute(src:vexpandps128(a:a.as_f32x4(), src:_mm_setzero_ps().as_f32x4(), mask:k))
17612	}
17613
17614	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17615	///
17616	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
17617	#[inline]
17618	#[target_feature(enable = "avx512f")]
17619	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17620	#[cfg_attr(test, assert_instr(vexpandpd))]
17621	pub unsafe fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
17622	transmute(src:vexpandpd(a:a.as_f64x8(), src:src.as_f64x8(), mask:k))
17623	}
17624
17625	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17626	///
17627	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
17628	#[inline]
17629	#[target_feature(enable = "avx512f")]
17630	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17631	#[cfg_attr(test, assert_instr(vexpandpd))]
17632	pub unsafe fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
17633	transmute(src:vexpandpd(a:a.as_f64x8(), src:_mm512_setzero_pd().as_f64x8(), mask:k))
17634	}
17635
17636	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17637	///
17638	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
17639	#[inline]
17640	#[target_feature(enable = "avx512f,avx512vl")]
17641	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17642	#[cfg_attr(test, assert_instr(vexpandpd))]
17643	pub unsafe fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
17644	transmute(src:vexpandpd256(a:a.as_f64x4(), src:src.as_f64x4(), mask:k))
17645	}
17646
17647	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17648	///
17649	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
17650	#[inline]
17651	#[target_feature(enable = "avx512f,avx512vl")]
17652	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17653	#[cfg_attr(test, assert_instr(vexpandpd))]
17654	pub unsafe fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
17655	transmute(src:vexpandpd256(
17656	a:a.as_f64x4(),
17657	src:_mm256_setzero_pd().as_f64x4(),
17658	mask:k,
17659	))
17660	}
17661
17662	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17663	///
17664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
17665	#[inline]
17666	#[target_feature(enable = "avx512f,avx512vl")]
17667	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17668	#[cfg_attr(test, assert_instr(vexpandpd))]
17669	pub unsafe fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
17670	transmute(src:vexpandpd128(a:a.as_f64x2(), src:src.as_f64x2(), mask:k))
17671	}
17672
17673	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17674	///
17675	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
17676	#[inline]
17677	#[target_feature(enable = "avx512f,avx512vl")]
17678	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17679	#[cfg_attr(test, assert_instr(vexpandpd))]
17680	pub unsafe fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
17681	transmute(src:vexpandpd128(a:a.as_f64x2(), src:_mm_setzero_pd().as_f64x2(), mask:k))
17682	}
17683
17684	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
17685	///
17686	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
17687	#[inline]
17688	#[target_feature(enable = "avx512f")]
17689	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17690	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
17691	#[rustc_legacy_const_generics(`1`)]
17692	pub unsafe fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
17693	static_assert_uimm_bits!(IMM8, `8`);
17694	let a: i32x16 = a.as_i32x16();
17695	let r: i32x16 = vprold(a, IMM8);
17696	transmute(src:r)
17697	}
17698
17699	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17700	///
17701	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
17702	#[inline]
17703	#[target_feature(enable = "avx512f")]
17704	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17705	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
17706	#[rustc_legacy_const_generics(`3`)]
17707	pub unsafe fn _mm512_mask_rol_epi32<const IMM8: i32>(
17708	src: __m512i,
17709	k: __mmask16,
17710	a: __m512i,
17711	) -> __m512i {
17712	static_assert_uimm_bits!(IMM8, `8`);
17713	let a: i32x16 = a.as_i32x16();
17714	let r: i32x16 = vprold(a, IMM8);
17715	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
17716	}
17717
17718	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17719	///
17720	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
17721	#[inline]
17722	#[target_feature(enable = "avx512f")]
17723	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17724	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
17725	#[rustc_legacy_const_generics(`2`)]
17726	pub unsafe fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
17727	static_assert_uimm_bits!(IMM8, `8`);
17728	let a: i32x16 = a.as_i32x16();
17729	let r: i32x16 = vprold(a, IMM8);
17730	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
17731	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
17732	}
17733
17734	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
17735	///
17736	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
17737	#[inline]
17738	#[target_feature(enable = "avx512f,avx512vl")]
17739	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17740	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
17741	#[rustc_legacy_const_generics(`1`)]
17742	pub unsafe fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
17743	static_assert_uimm_bits!(IMM8, `8`);
17744	let a: i32x8 = a.as_i32x8();
17745	let r: i32x8 = vprold256(a, IMM8);
17746	transmute(src:r)
17747	}
17748
17749	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17750	///
17751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
17752	#[inline]
17753	#[target_feature(enable = "avx512f,avx512vl")]
17754	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17755	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
17756	#[rustc_legacy_const_generics(`3`)]
17757	pub unsafe fn _mm256_mask_rol_epi32<const IMM8: i32>(
17758	src: __m256i,
17759	k: __mmask8,
17760	a: __m256i,
17761	) -> __m256i {
17762	static_assert_uimm_bits!(IMM8, `8`);
17763	let a: i32x8 = a.as_i32x8();
17764	let r: i32x8 = vprold256(a, IMM8);
17765	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
17766	}
17767
17768	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17769	///
17770	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
17771	#[inline]
17772	#[target_feature(enable = "avx512f,avx512vl")]
17773	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17774	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
17775	#[rustc_legacy_const_generics(`2`)]
17776	pub unsafe fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
17777	static_assert_uimm_bits!(IMM8, `8`);
17778	let a: i32x8 = a.as_i32x8();
17779	let r: i32x8 = vprold256(a, IMM8);
17780	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
17781	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
17782	}
17783
17784	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
17785	///
17786	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
17787	#[inline]
17788	#[target_feature(enable = "avx512f,avx512vl")]
17789	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17790	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
17791	#[rustc_legacy_const_generics(`1`)]
17792	pub unsafe fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
17793	static_assert_uimm_bits!(IMM8, `8`);
17794	let a: i32x4 = a.as_i32x4();
17795	let r: i32x4 = vprold128(a, IMM8);
17796	transmute(src:r)
17797	}
17798
17799	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17800	///
17801	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
17802	#[inline]
17803	#[target_feature(enable = "avx512f,avx512vl")]
17804	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17805	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
17806	#[rustc_legacy_const_generics(`3`)]
17807	pub unsafe fn _mm_mask_rol_epi32<const IMM8: i32>(
17808	src: __m128i,
17809	k: __mmask8,
17810	a: __m128i,
17811	) -> __m128i {
17812	static_assert_uimm_bits!(IMM8, `8`);
17813	let a: i32x4 = a.as_i32x4();
17814	let r: i32x4 = vprold128(a, IMM8);
17815	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
17816	}
17817
17818	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17819	///
17820	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
17821	#[inline]
17822	#[target_feature(enable = "avx512f,avx512vl")]
17823	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17824	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
17825	#[rustc_legacy_const_generics(`2`)]
17826	pub unsafe fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
17827	static_assert_uimm_bits!(IMM8, `8`);
17828	let a: i32x4 = a.as_i32x4();
17829	let r: i32x4 = vprold128(a, IMM8);
17830	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
17831	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
17832	}
17833
17834	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
17835	///
17836	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
17837	#[inline]
17838	#[target_feature(enable = "avx512f")]
17839	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17840	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
17841	#[rustc_legacy_const_generics(`1`)]
17842	pub unsafe fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
17843	static_assert_uimm_bits!(IMM8, `8`);
17844	let a: i32x16 = a.as_i32x16();
17845	let r: i32x16 = vprord(a, IMM8);
17846	transmute(src:r)
17847	}
17848
17849	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17850	///
17851	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
17852	#[inline]
17853	#[target_feature(enable = "avx512f")]
17854	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17855	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
17856	#[rustc_legacy_const_generics(`3`)]
17857	pub unsafe fn _mm512_mask_ror_epi32<const IMM8: i32>(
17858	src: __m512i,
17859	k: __mmask16,
17860	a: __m512i,
17861	) -> __m512i {
17862	static_assert_uimm_bits!(IMM8, `8`);
17863	let a: i32x16 = a.as_i32x16();
17864	let r: i32x16 = vprord(a, IMM8);
17865	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
17866	}
17867
17868	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17869	///
17870	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
17871	#[inline]
17872	#[target_feature(enable = "avx512f")]
17873	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17874	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
17875	#[rustc_legacy_const_generics(`2`)]
17876	pub unsafe fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
17877	static_assert_uimm_bits!(IMM8, `8`);
17878	let a: i32x16 = a.as_i32x16();
17879	let r: i32x16 = vprord(a, IMM8);
17880	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
17881	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
17882	}
17883
17884	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
17885	///
17886	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
17887	#[inline]
17888	#[target_feature(enable = "avx512f,avx512vl")]
17889	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17890	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
17891	#[rustc_legacy_const_generics(`1`)]
17892	pub unsafe fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
17893	static_assert_uimm_bits!(IMM8, `8`);
17894	let a: i32x8 = a.as_i32x8();
17895	let r: i32x8 = vprord256(a, IMM8);
17896	transmute(src:r)
17897	}
17898
17899	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17900	///
17901	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
17902	#[inline]
17903	#[target_feature(enable = "avx512f,avx512vl")]
17904	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17905	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
17906	#[rustc_legacy_const_generics(`3`)]
17907	pub unsafe fn _mm256_mask_ror_epi32<const IMM8: i32>(
17908	src: __m256i,
17909	k: __mmask8,
17910	a: __m256i,
17911	) -> __m256i {
17912	static_assert_uimm_bits!(IMM8, `8`);
17913	let a: i32x8 = a.as_i32x8();
17914	let r: i32x8 = vprord256(a, IMM8);
17915	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
17916	}
17917
17918	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17919	///
17920	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
17921	#[inline]
17922	#[target_feature(enable = "avx512f,avx512vl")]
17923	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17924	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
17925	#[rustc_legacy_const_generics(`2`)]
17926	pub unsafe fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
17927	static_assert_uimm_bits!(IMM8, `8`);
17928	let a: i32x8 = a.as_i32x8();
17929	let r: i32x8 = vprord256(a, IMM8);
17930	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
17931	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
17932	}
17933
17934	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
17935	///
17936	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
17937	#[inline]
17938	#[target_feature(enable = "avx512f,avx512vl")]
17939	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17940	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
17941	#[rustc_legacy_const_generics(`1`)]
17942	pub unsafe fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
17943	static_assert_uimm_bits!(IMM8, `8`);
17944	let a: i32x4 = a.as_i32x4();
17945	let r: i32x4 = vprord128(a, IMM8);
17946	transmute(src:r)
17947	}
17948
17949	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17950	///
17951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
17952	#[inline]
17953	#[target_feature(enable = "avx512f,avx512vl")]
17954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17955	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
17956	#[rustc_legacy_const_generics(`3`)]
17957	pub unsafe fn _mm_mask_ror_epi32<const IMM8: i32>(
17958	src: __m128i,
17959	k: __mmask8,
17960	a: __m128i,
17961	) -> __m128i {
17962	static_assert_uimm_bits!(IMM8, `8`);
17963	let a: i32x4 = a.as_i32x4();
17964	let r: i32x4 = vprord128(a, IMM8);
17965	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
17966	}
17967
17968	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17969	///
17970	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
17971	#[inline]
17972	#[target_feature(enable = "avx512f,avx512vl")]
17973	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17974	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
17975	#[rustc_legacy_const_generics(`2`)]
17976	pub unsafe fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
17977	static_assert_uimm_bits!(IMM8, `8`);
17978	let a: i32x4 = a.as_i32x4();
17979	let r: i32x4 = vprord128(a, IMM8);
17980	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
17981	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
17982	}
17983
17984	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
17985	///
17986	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
17987	#[inline]
17988	#[target_feature(enable = "avx512f")]
17989	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17990	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
17991	#[rustc_legacy_const_generics(`1`)]
17992	pub unsafe fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
17993	static_assert_uimm_bits!(IMM8, `8`);
17994	let a: i64x8 = a.as_i64x8();
17995	let r: i64x8 = vprolq(a, IMM8);
17996	transmute(src:r)
17997	}
17998
17999	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18000	///
18001	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
18002	#[inline]
18003	#[target_feature(enable = "avx512f")]
18004	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18005	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
18006	#[rustc_legacy_const_generics(`3`)]
18007	pub unsafe fn _mm512_mask_rol_epi64<const IMM8: i32>(
18008	src: __m512i,
18009	k: __mmask8,
18010	a: __m512i,
18011	) -> __m512i {
18012	static_assert_uimm_bits!(IMM8, `8`);
18013	let a: i64x8 = a.as_i64x8();
18014	let r: i64x8 = vprolq(a, IMM8);
18015	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
18016	}
18017
18018	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18019	///
18020	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
18021	#[inline]
18022	#[target_feature(enable = "avx512f")]
18023	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18024	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
18025	#[rustc_legacy_const_generics(`2`)]
18026	pub unsafe fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
18027	static_assert_uimm_bits!(IMM8, `8`);
18028	let a: i64x8 = a.as_i64x8();
18029	let r: i64x8 = vprolq(a, IMM8);
18030	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
18031	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18032	}
18033
18034	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
18035	///
18036	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
18037	#[inline]
18038	#[target_feature(enable = "avx512f,avx512vl")]
18039	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18040	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
18041	#[rustc_legacy_const_generics(`1`)]
18042	pub unsafe fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
18043	static_assert_uimm_bits!(IMM8, `8`);
18044	let a: i64x4 = a.as_i64x4();
18045	let r: i64x4 = vprolq256(a, IMM8);
18046	transmute(src:r)
18047	}
18048
18049	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18050	///
18051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
18052	#[inline]
18053	#[target_feature(enable = "avx512f,avx512vl")]
18054	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18055	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
18056	#[rustc_legacy_const_generics(`3`)]
18057	pub unsafe fn _mm256_mask_rol_epi64<const IMM8: i32>(
18058	src: __m256i,
18059	k: __mmask8,
18060	a: __m256i,
18061	) -> __m256i {
18062	static_assert_uimm_bits!(IMM8, `8`);
18063	let a: i64x4 = a.as_i64x4();
18064	let r: i64x4 = vprolq256(a, IMM8);
18065	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
18066	}
18067
18068	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18069	///
18070	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
18071	#[inline]
18072	#[target_feature(enable = "avx512f,avx512vl")]
18073	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18074	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
18075	#[rustc_legacy_const_generics(`2`)]
18076	pub unsafe fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
18077	static_assert_uimm_bits!(IMM8, `8`);
18078	let a: i64x4 = a.as_i64x4();
18079	let r: i64x4 = vprolq256(a, IMM8);
18080	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
18081	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18082	}
18083
18084	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
18085	///
18086	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
18087	#[inline]
18088	#[target_feature(enable = "avx512f,avx512vl")]
18089	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18090	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
18091	#[rustc_legacy_const_generics(`1`)]
18092	pub unsafe fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
18093	static_assert_uimm_bits!(IMM8, `8`);
18094	let a: i64x2 = a.as_i64x2();
18095	let r: i64x2 = vprolq128(a, IMM8);
18096	transmute(src:r)
18097	}
18098
18099	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18100	///
18101	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
18102	#[inline]
18103	#[target_feature(enable = "avx512f,avx512vl")]
18104	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18105	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
18106	#[rustc_legacy_const_generics(`3`)]
18107	pub unsafe fn _mm_mask_rol_epi64<const IMM8: i32>(
18108	src: __m128i,
18109	k: __mmask8,
18110	a: __m128i,
18111	) -> __m128i {
18112	static_assert_uimm_bits!(IMM8, `8`);
18113	let a: i64x2 = a.as_i64x2();
18114	let r: i64x2 = vprolq128(a, IMM8);
18115	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
18116	}
18117
18118	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18119	///
18120	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
18121	#[inline]
18122	#[target_feature(enable = "avx512f,avx512vl")]
18123	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18124	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
18125	#[rustc_legacy_const_generics(`2`)]
18126	pub unsafe fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
18127	static_assert_uimm_bits!(IMM8, `8`);
18128	let a: i64x2 = a.as_i64x2();
18129	let r: i64x2 = vprolq128(a, IMM8);
18130	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
18131	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18132	}
18133
18134	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
18135	///
18136	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
18137	#[inline]
18138	#[target_feature(enable = "avx512f")]
18139	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18140	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
18141	#[rustc_legacy_const_generics(`1`)]
18142	pub unsafe fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
18143	static_assert_uimm_bits!(IMM8, `8`);
18144	let a: i64x8 = a.as_i64x8();
18145	let r: i64x8 = vprorq(a, IMM8);
18146	transmute(src:r)
18147	}
18148
18149	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18150	///
18151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
18152	#[inline]
18153	#[target_feature(enable = "avx512f")]
18154	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18155	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
18156	#[rustc_legacy_const_generics(`3`)]
18157	pub unsafe fn _mm512_mask_ror_epi64<const IMM8: i32>(
18158	src: __m512i,
18159	k: __mmask8,
18160	a: __m512i,
18161	) -> __m512i {
18162	static_assert_uimm_bits!(IMM8, `8`);
18163	let a: i64x8 = a.as_i64x8();
18164	let r: i64x8 = vprorq(a, IMM8);
18165	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
18166	}
18167
18168	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18169	///
18170	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
18171	#[inline]
18172	#[target_feature(enable = "avx512f")]
18173	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18174	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
18175	#[rustc_legacy_const_generics(`2`)]
18176	pub unsafe fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
18177	static_assert_uimm_bits!(IMM8, `8`);
18178	let a: i64x8 = a.as_i64x8();
18179	let r: i64x8 = vprorq(a, IMM8);
18180	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
18181	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18182	}
18183
18184	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
18185	///
18186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
18187	#[inline]
18188	#[target_feature(enable = "avx512f,avx512vl")]
18189	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18190	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
18191	#[rustc_legacy_const_generics(`1`)]
18192	pub unsafe fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
18193	static_assert_uimm_bits!(IMM8, `8`);
18194	let a: i64x4 = a.as_i64x4();
18195	let r: i64x4 = vprorq256(a, IMM8);
18196	transmute(src:r)
18197	}
18198
18199	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18200	///
18201	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
18202	#[inline]
18203	#[target_feature(enable = "avx512f,avx512vl")]
18204	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18205	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
18206	#[rustc_legacy_const_generics(`3`)]
18207	pub unsafe fn _mm256_mask_ror_epi64<const IMM8: i32>(
18208	src: __m256i,
18209	k: __mmask8,
18210	a: __m256i,
18211	) -> __m256i {
18212	static_assert_uimm_bits!(IMM8, `8`);
18213	let a: i64x4 = a.as_i64x4();
18214	let r: i64x4 = vprorq256(a, IMM8);
18215	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
18216	}
18217
18218	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18219	///
18220	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
18221	#[inline]
18222	#[target_feature(enable = "avx512f,avx512vl")]
18223	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18224	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
18225	#[rustc_legacy_const_generics(`2`)]
18226	pub unsafe fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
18227	static_assert_uimm_bits!(IMM8, `8`);
18228	let a: i64x4 = a.as_i64x4();
18229	let r: i64x4 = vprorq256(a, IMM8);
18230	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
18231	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18232	}
18233
18234	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
18235	///
18236	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
18237	#[inline]
18238	#[target_feature(enable = "avx512f,avx512vl")]
18239	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18240	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
18241	#[rustc_legacy_const_generics(`1`)]
18242	pub unsafe fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
18243	static_assert_uimm_bits!(IMM8, `8`);
18244	let a: i64x2 = a.as_i64x2();
18245	let r: i64x2 = vprorq128(a, IMM8);
18246	transmute(src:r)
18247	}
18248
18249	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18250	///
18251	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
18252	#[inline]
18253	#[target_feature(enable = "avx512f,avx512vl")]
18254	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18255	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
18256	#[rustc_legacy_const_generics(`3`)]
18257	pub unsafe fn _mm_mask_ror_epi64<const IMM8: i32>(
18258	src: __m128i,
18259	k: __mmask8,
18260	a: __m128i,
18261	) -> __m128i {
18262	static_assert_uimm_bits!(IMM8, `8`);
18263	let a: i64x2 = a.as_i64x2();
18264	let r: i64x2 = vprorq128(a, IMM8);
18265	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
18266	}
18267
18268	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18269	///
18270	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
18271	#[inline]
18272	#[target_feature(enable = "avx512f,avx512vl")]
18273	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18274	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
18275	#[rustc_legacy_const_generics(`2`)]
18276	pub unsafe fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
18277	static_assert_uimm_bits!(IMM8, `8`);
18278	let a: i64x2 = a.as_i64x2();
18279	let r: i64x2 = vprorq128(a, IMM8);
18280	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
18281	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18282	}
18283
18284	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
18285	///
18286	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
18287	#[inline]
18288	#[target_feature(enable = "avx512f")]
18289	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18290	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
18291	#[rustc_legacy_const_generics(`1`)]
18292	pub unsafe fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
18293	static_assert_uimm_bits!(IMM8, `8`);
18294	if IMM8 >= `32` {
18295	_mm512_setzero_si512()
18296	} else {
18297	transmute(src:simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
18298	}
18299	}
18300
18301	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18302	///
18303	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
18304	#[inline]
18305	#[target_feature(enable = "avx512f")]
18306	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18307	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
18308	#[rustc_legacy_const_generics(`3`)]
18309	pub unsafe fn _mm512_mask_slli_epi32<const IMM8: u32>(
18310	src: __m512i,
18311	k: __mmask16,
18312	a: __m512i,
18313	) -> __m512i {
18314	static_assert_uimm_bits!(IMM8, `8`);
18315	let shf: u32x16 = if IMM8 >= `32` {
18316	u32x16::splat(`0`)
18317	} else {
18318	simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
18319	};
18320	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
18321	}
18322
18323	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18324	///
18325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
18326	#[inline]
18327	#[target_feature(enable = "avx512f")]
18328	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18329	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
18330	#[rustc_legacy_const_generics(`2`)]
18331	pub unsafe fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
18332	static_assert_uimm_bits!(IMM8, `8`);
18333	if IMM8 >= `32` {
18334	_mm512_setzero_si512()
18335	} else {
18336	let shf: u32x16 = simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
18337	let zero: u32x16 = u32x16::splat(`0`);
18338	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18339	}
18340	}
18341
18342	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18343	///
18344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
18345	#[inline]
18346	#[target_feature(enable = "avx512f,avx512vl")]
18347	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18348	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
18349	#[rustc_legacy_const_generics(`3`)]
18350	pub unsafe fn _mm256_mask_slli_epi32<const IMM8: u32>(
18351	src: __m256i,
18352	k: __mmask8,
18353	a: __m256i,
18354	) -> __m256i {
18355	static_assert_uimm_bits!(IMM8, `8`);
18356	let r: u32x8 = if IMM8 >= `32` {
18357	u32x8::splat(`0`)
18358	} else {
18359	simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
18360	};
18361	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
18362	}
18363
18364	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18365	///
18366	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
18367	#[inline]
18368	#[target_feature(enable = "avx512f,avx512vl")]
18369	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18370	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
18371	#[rustc_legacy_const_generics(`2`)]
18372	pub unsafe fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
18373	static_assert_uimm_bits!(IMM8, `8`);
18374	if IMM8 >= `32` {
18375	_mm256_setzero_si256()
18376	} else {
18377	let r: u32x8 = simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
18378	let zero: u32x8 = u32x8::splat(`0`);
18379	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18380	}
18381	}
18382
18383	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18384	///
18385	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
18386	#[inline]
18387	#[target_feature(enable = "avx512f,avx512vl")]
18388	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18389	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
18390	#[rustc_legacy_const_generics(`3`)]
18391	pub unsafe fn _mm_mask_slli_epi32<const IMM8: u32>(
18392	src: __m128i,
18393	k: __mmask8,
18394	a: __m128i,
18395	) -> __m128i {
18396	static_assert_uimm_bits!(IMM8, `8`);
18397	let r: u32x4 = if IMM8 >= `32` {
18398	u32x4::splat(`0`)
18399	} else {
18400	simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
18401	};
18402	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
18403	}
18404
18405	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18406	///
18407	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
18408	#[inline]
18409	#[target_feature(enable = "avx512f,avx512vl")]
18410	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18411	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
18412	#[rustc_legacy_const_generics(`2`)]
18413	pub unsafe fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
18414	static_assert_uimm_bits!(IMM8, `8`);
18415	if IMM8 >= `32` {
18416	_mm_setzero_si128()
18417	} else {
18418	let r: u32x4 = simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
18419	let zero: u32x4 = u32x4::splat(`0`);
18420	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18421	}
18422	}
18423
18424	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
18425	///
18426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
18427	#[inline]
18428	#[target_feature(enable = "avx512f")]
18429	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18430	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
18431	#[rustc_legacy_const_generics(`1`)]
18432	pub unsafe fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
18433	static_assert_uimm_bits!(IMM8, `8`);
18434	if IMM8 >= `32` {
18435	_mm512_setzero_si512()
18436	} else {
18437	transmute(src:simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
18438	}
18439	}
18440
18441	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18442	///
18443	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
18444	#[inline]
18445	#[target_feature(enable = "avx512f")]
18446	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18447	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
18448	#[rustc_legacy_const_generics(`3`)]
18449	pub unsafe fn _mm512_mask_srli_epi32<const IMM8: u32>(
18450	src: __m512i,
18451	k: __mmask16,
18452	a: __m512i,
18453	) -> __m512i {
18454	static_assert_uimm_bits!(IMM8, `8`);
18455	let shf: u32x16 = if IMM8 >= `32` {
18456	u32x16::splat(`0`)
18457	} else {
18458	simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
18459	};
18460	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
18461	}
18462
18463	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18464	///
18465	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
18466	#[inline]
18467	#[target_feature(enable = "avx512f")]
18468	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18469	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
18470	#[rustc_legacy_const_generics(`2`)]
18471	pub unsafe fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
18472	static_assert_uimm_bits!(IMM8, `8`);
18473	if IMM8 >= `32` {
18474	_mm512_setzero_si512()
18475	} else {
18476	let shf: u32x16 = simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
18477	let zero: u32x16 = u32x16::splat(`0`);
18478	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18479	}
18480	}
18481
18482	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18483	///
18484	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
18485	#[inline]
18486	#[target_feature(enable = "avx512f,avx512vl")]
18487	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18488	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
18489	#[rustc_legacy_const_generics(`3`)]
18490	pub unsafe fn _mm256_mask_srli_epi32<const IMM8: u32>(
18491	src: __m256i,
18492	k: __mmask8,
18493	a: __m256i,
18494	) -> __m256i {
18495	static_assert_uimm_bits!(IMM8, `8`);
18496	let r: u32x8 = if IMM8 >= `32` {
18497	u32x8::splat(`0`)
18498	} else {
18499	simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
18500	};
18501	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
18502	}
18503
18504	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18505	///
18506	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
18507	#[inline]
18508	#[target_feature(enable = "avx512f,avx512vl")]
18509	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18510	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
18511	#[rustc_legacy_const_generics(`2`)]
18512	pub unsafe fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
18513	static_assert_uimm_bits!(IMM8, `8`);
18514	if IMM8 >= `32` {
18515	_mm256_setzero_si256()
18516	} else {
18517	let r: u32x8 = simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
18518	let zero: u32x8 = u32x8::splat(`0`);
18519	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18520	}
18521	}
18522
18523	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18524	///
18525	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
18526	#[inline]
18527	#[target_feature(enable = "avx512f,avx512vl")]
18528	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18529	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
18530	#[rustc_legacy_const_generics(`3`)]
18531	pub unsafe fn _mm_mask_srli_epi32<const IMM8: u32>(
18532	src: __m128i,
18533	k: __mmask8,
18534	a: __m128i,
18535	) -> __m128i {
18536	static_assert_uimm_bits!(IMM8, `8`);
18537	let r: u32x4 = if IMM8 >= `32` {
18538	u32x4::splat(`0`)
18539	} else {
18540	simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
18541	};
18542	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
18543	}
18544
18545	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18546	///
18547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
18548	#[inline]
18549	#[target_feature(enable = "avx512f,avx512vl")]
18550	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18551	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
18552	#[rustc_legacy_const_generics(`2`)]
18553	pub unsafe fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
18554	static_assert_uimm_bits!(IMM8, `8`);
18555	if IMM8 >= `32` {
18556	_mm_setzero_si128()
18557	} else {
18558	let r: u32x4 = simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
18559	let zero: u32x4 = u32x4::splat(`0`);
18560	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18561	}
18562	}
18563
18564	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
18565	///
18566	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
18567	#[inline]
18568	#[target_feature(enable = "avx512f")]
18569	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18570	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
18571	#[rustc_legacy_const_generics(`1`)]
18572	pub unsafe fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
18573	static_assert_uimm_bits!(IMM8, `8`);
18574	if IMM8 >= `64` {
18575	_mm512_setzero_si512()
18576	} else {
18577	transmute(src:simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
18578	}
18579	}
18580
18581	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18582	///
18583	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
18584	#[inline]
18585	#[target_feature(enable = "avx512f")]
18586	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18587	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
18588	#[rustc_legacy_const_generics(`3`)]
18589	pub unsafe fn _mm512_mask_slli_epi64<const IMM8: u32>(
18590	src: __m512i,
18591	k: __mmask8,
18592	a: __m512i,
18593	) -> __m512i {
18594	static_assert_uimm_bits!(IMM8, `8`);
18595	let shf: u64x8 = if IMM8 >= `64` {
18596	u64x8::splat(`0`)
18597	} else {
18598	simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
18599	};
18600	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
18601	}
18602
18603	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18604	///
18605	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
18606	#[inline]
18607	#[target_feature(enable = "avx512f")]
18608	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18609	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
18610	#[rustc_legacy_const_generics(`2`)]
18611	pub unsafe fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
18612	static_assert_uimm_bits!(IMM8, `8`);
18613	if IMM8 >= `64` {
18614	_mm512_setzero_si512()
18615	} else {
18616	let shf: u64x8 = simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
18617	let zero: u64x8 = u64x8::splat(`0`);
18618	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18619	}
18620	}
18621
18622	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18623	///
18624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
18625	#[inline]
18626	#[target_feature(enable = "avx512f,avx512vl")]
18627	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18628	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
18629	#[rustc_legacy_const_generics(`3`)]
18630	pub unsafe fn _mm256_mask_slli_epi64<const IMM8: u32>(
18631	src: __m256i,
18632	k: __mmask8,
18633	a: __m256i,
18634	) -> __m256i {
18635	static_assert_uimm_bits!(IMM8, `8`);
18636	let r: u64x4 = if IMM8 >= `64` {
18637	u64x4::splat(`0`)
18638	} else {
18639	simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
18640	};
18641	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
18642	}
18643
18644	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18645	///
18646	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
18647	#[inline]
18648	#[target_feature(enable = "avx512f,avx512vl")]
18649	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18650	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
18651	#[rustc_legacy_const_generics(`2`)]
18652	pub unsafe fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
18653	static_assert_uimm_bits!(IMM8, `8`);
18654	if IMM8 >= `64` {
18655	_mm256_setzero_si256()
18656	} else {
18657	let r: u64x4 = simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
18658	let zero: u64x4 = u64x4::splat(`0`);
18659	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18660	}
18661	}
18662
18663	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18664	///
18665	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
18666	#[inline]
18667	#[target_feature(enable = "avx512f,avx512vl")]
18668	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18669	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
18670	#[rustc_legacy_const_generics(`3`)]
18671	pub unsafe fn _mm_mask_slli_epi64<const IMM8: u32>(
18672	src: __m128i,
18673	k: __mmask8,
18674	a: __m128i,
18675	) -> __m128i {
18676	static_assert_uimm_bits!(IMM8, `8`);
18677	let r: u64x2 = if IMM8 >= `64` {
18678	u64x2::splat(`0`)
18679	} else {
18680	simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
18681	};
18682	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
18683	}
18684
18685	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18686	///
18687	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
18688	#[inline]
18689	#[target_feature(enable = "avx512f,avx512vl")]
18690	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18691	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
18692	#[rustc_legacy_const_generics(`2`)]
18693	pub unsafe fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
18694	static_assert_uimm_bits!(IMM8, `8`);
18695	if IMM8 >= `64` {
18696	_mm_setzero_si128()
18697	} else {
18698	let r: u64x2 = simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
18699	let zero: u64x2 = u64x2::splat(`0`);
18700	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18701	}
18702	}
18703
18704	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
18705	///
18706	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
18707	#[inline]
18708	#[target_feature(enable = "avx512f")]
18709	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18710	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
18711	#[rustc_legacy_const_generics(`1`)]
18712	pub unsafe fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
18713	static_assert_uimm_bits!(IMM8, `8`);
18714	if IMM8 >= `64` {
18715	_mm512_setzero_si512()
18716	} else {
18717	transmute(src:simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
18718	}
18719	}
18720
18721	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18722	///
18723	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
18724	#[inline]
18725	#[target_feature(enable = "avx512f")]
18726	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18727	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
18728	#[rustc_legacy_const_generics(`3`)]
18729	pub unsafe fn _mm512_mask_srli_epi64<const IMM8: u32>(
18730	src: __m512i,
18731	k: __mmask8,
18732	a: __m512i,
18733	) -> __m512i {
18734	static_assert_uimm_bits!(IMM8, `8`);
18735	let shf: u64x8 = if IMM8 >= `64` {
18736	u64x8::splat(`0`)
18737	} else {
18738	simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
18739	};
18740	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
18741	}
18742
18743	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18744	///
18745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
18746	#[inline]
18747	#[target_feature(enable = "avx512f")]
18748	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18749	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
18750	#[rustc_legacy_const_generics(`2`)]
18751	pub unsafe fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
18752	static_assert_uimm_bits!(IMM8, `8`);
18753	if IMM8 >= `64` {
18754	_mm512_setzero_si512()
18755	} else {
18756	let shf: u64x8 = simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
18757	let zero: u64x8 = u64x8::splat(`0`);
18758	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18759	}
18760	}
18761
18762	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18763	///
18764	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
18765	#[inline]
18766	#[target_feature(enable = "avx512f,avx512vl")]
18767	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18768	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
18769	#[rustc_legacy_const_generics(`3`)]
18770	pub unsafe fn _mm256_mask_srli_epi64<const IMM8: u32>(
18771	src: __m256i,
18772	k: __mmask8,
18773	a: __m256i,
18774	) -> __m256i {
18775	static_assert_uimm_bits!(IMM8, `8`);
18776	let r: u64x4 = if IMM8 >= `64` {
18777	u64x4::splat(`0`)
18778	} else {
18779	simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
18780	};
18781	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
18782	}
18783
18784	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18785	///
18786	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
18787	#[inline]
18788	#[target_feature(enable = "avx512f,avx512vl")]
18789	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18790	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
18791	#[rustc_legacy_const_generics(`2`)]
18792	pub unsafe fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
18793	static_assert_uimm_bits!(IMM8, `8`);
18794	if IMM8 >= `64` {
18795	_mm256_setzero_si256()
18796	} else {
18797	let r: u64x4 = simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
18798	let zero: u64x4 = u64x4::splat(`0`);
18799	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18800	}
18801	}
18802
18803	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18804	///
18805	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
18806	#[inline]
18807	#[target_feature(enable = "avx512f,avx512vl")]
18808	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18809	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
18810	#[rustc_legacy_const_generics(`3`)]
18811	pub unsafe fn _mm_mask_srli_epi64<const IMM8: u32>(
18812	src: __m128i,
18813	k: __mmask8,
18814	a: __m128i,
18815	) -> __m128i {
18816	static_assert_uimm_bits!(IMM8, `8`);
18817	let r: u64x2 = if IMM8 >= `64` {
18818	u64x2::splat(`0`)
18819	} else {
18820	simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
18821	};
18822	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
18823	}
18824
18825	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18826	///
18827	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
18828	#[inline]
18829	#[target_feature(enable = "avx512f,avx512vl")]
18830	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18831	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
18832	#[rustc_legacy_const_generics(`2`)]
18833	pub unsafe fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
18834	static_assert_uimm_bits!(IMM8, `8`);
18835	if IMM8 >= `64` {
18836	_mm_setzero_si128()
18837	} else {
18838	let r: u64x2 = simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
18839	let zero: u64x2 = u64x2::splat(`0`);
18840	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18841	}
18842	}
18843
18844	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
18845	///
18846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
18847	#[inline]
18848	#[target_feature(enable = "avx512f")]
18849	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18850	#[cfg_attr(test, assert_instr(vpslld))]
18851	pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
18852	transmute(src:vpslld(a:a.as_i32x16(), count:count.as_i32x4()))
18853	}
18854
18855	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18856	///
18857	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
18858	#[inline]
18859	#[target_feature(enable = "avx512f")]
18860	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18861	#[cfg_attr(test, assert_instr(vpslld))]
18862	pub unsafe fn _mm512_mask_sll_epi32(
18863	src: __m512i,
18864	k: __mmask16,
18865	a: __m512i,
18866	count: __m128i,
18867	) -> __m512i {
18868	let shf: i32x16 = _mm512_sll_epi32(a, count).as_i32x16();
18869	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
18870	}
18871
18872	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18873	///
18874	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
18875	#[inline]
18876	#[target_feature(enable = "avx512f")]
18877	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18878	#[cfg_attr(test, assert_instr(vpslld))]
18879	pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
18880	let shf: i32x16 = _mm512_sll_epi32(a, count).as_i32x16();
18881	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
18882	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18883	}
18884
18885	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18886	///
18887	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
18888	#[inline]
18889	#[target_feature(enable = "avx512f,avx512vl")]
18890	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18891	#[cfg_attr(test, assert_instr(vpslld))]
18892	pub unsafe fn _mm256_mask_sll_epi32(
18893	src: __m256i,
18894	k: __mmask8,
18895	a: __m256i,
18896	count: __m128i,
18897	) -> __m256i {
18898	let shf: i32x8 = _mm256_sll_epi32(a, count).as_i32x8();
18899	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
18900	}
18901
18902	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18903	///
18904	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
18905	#[inline]
18906	#[target_feature(enable = "avx512f,avx512vl")]
18907	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18908	#[cfg_attr(test, assert_instr(vpslld))]
18909	pub unsafe fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
18910	let shf: i32x8 = _mm256_sll_epi32(a, count).as_i32x8();
18911	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
18912	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18913	}
18914
18915	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18916	///
18917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
18918	#[inline]
18919	#[target_feature(enable = "avx512f,avx512vl")]
18920	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18921	#[cfg_attr(test, assert_instr(vpslld))]
18922	pub unsafe fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
18923	let shf: i32x4 = _mm_sll_epi32(a, count).as_i32x4();
18924	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
18925	}
18926
18927	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18928	///
18929	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
18930	#[inline]
18931	#[target_feature(enable = "avx512f,avx512vl")]
18932	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18933	#[cfg_attr(test, assert_instr(vpslld))]
18934	pub unsafe fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
18935	let shf: i32x4 = _mm_sll_epi32(a, count).as_i32x4();
18936	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
18937	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18938	}
18939
18940	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
18941	///
18942	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
18943	#[inline]
18944	#[target_feature(enable = "avx512f")]
18945	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18946	#[cfg_attr(test, assert_instr(vpsrld))]
18947	pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
18948	transmute(src:vpsrld(a:a.as_i32x16(), count:count.as_i32x4()))
18949	}
18950
18951	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18952	///
18953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
18954	#[inline]
18955	#[target_feature(enable = "avx512f")]
18956	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18957	#[cfg_attr(test, assert_instr(vpsrld))]
18958	pub unsafe fn _mm512_mask_srl_epi32(
18959	src: __m512i,
18960	k: __mmask16,
18961	a: __m512i,
18962	count: __m128i,
18963	) -> __m512i {
18964	let shf: i32x16 = _mm512_srl_epi32(a, count).as_i32x16();
18965	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
18966	}
18967
18968	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18969	///
18970	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
18971	#[inline]
18972	#[target_feature(enable = "avx512f")]
18973	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18974	#[cfg_attr(test, assert_instr(vpsrld))]
18975	pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
18976	let shf: i32x16 = _mm512_srl_epi32(a, count).as_i32x16();
18977	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
18978	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18979	}
18980
18981	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18982	///
18983	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
18984	#[inline]
18985	#[target_feature(enable = "avx512f,avx512vl")]
18986	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18987	#[cfg_attr(test, assert_instr(vpsrld))]
18988	pub unsafe fn _mm256_mask_srl_epi32(
18989	src: __m256i,
18990	k: __mmask8,
18991	a: __m256i,
18992	count: __m128i,
18993	) -> __m256i {
18994	let shf: i32x8 = _mm256_srl_epi32(a, count).as_i32x8();
18995	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
18996	}
18997
18998	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18999	///
19000	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
19001	#[inline]
19002	#[target_feature(enable = "avx512f,avx512vl")]
19003	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19004	#[cfg_attr(test, assert_instr(vpsrld))]
19005	pub unsafe fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19006	let shf: i32x8 = _mm256_srl_epi32(a, count).as_i32x8();
19007	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
19008	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19009	}
19010
19011	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19012	///
19013	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
19014	#[inline]
19015	#[target_feature(enable = "avx512f,avx512vl")]
19016	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19017	#[cfg_attr(test, assert_instr(vpsrld))]
19018	pub unsafe fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19019	let shf: i32x4 = _mm_srl_epi32(a, count).as_i32x4();
19020	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
19021	}
19022
19023	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19024	///
19025	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
19026	#[inline]
19027	#[target_feature(enable = "avx512f,avx512vl")]
19028	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19029	#[cfg_attr(test, assert_instr(vpsrld))]
19030	pub unsafe fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19031	let shf: i32x4 = _mm_srl_epi32(a, count).as_i32x4();
19032	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
19033	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19034	}
19035
19036	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
19037	///
19038	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
19039	#[inline]
19040	#[target_feature(enable = "avx512f")]
19041	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19042	#[cfg_attr(test, assert_instr(vpsllq))]
19043	pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
19044	transmute(src:vpsllq(a:a.as_i64x8(), count:count.as_i64x2()))
19045	}
19046
19047	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19048	///
19049	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
19050	#[inline]
19051	#[target_feature(enable = "avx512f")]
19052	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19053	#[cfg_attr(test, assert_instr(vpsllq))]
19054	pub unsafe fn _mm512_mask_sll_epi64(
19055	src: __m512i,
19056	k: __mmask8,
19057	a: __m512i,
19058	count: __m128i,
19059	) -> __m512i {
19060	let shf: i64x8 = _mm512_sll_epi64(a, count).as_i64x8();
19061	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
19062	}
19063
19064	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19065	///
19066	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
19067	#[inline]
19068	#[target_feature(enable = "avx512f")]
19069	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19070	#[cfg_attr(test, assert_instr(vpsllq))]
19071	pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
19072	let shf: i64x8 = _mm512_sll_epi64(a, count).as_i64x8();
19073	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
19074	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19075	}
19076
19077	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19078	///
19079	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
19080	#[inline]
19081	#[target_feature(enable = "avx512f,avx512vl")]
19082	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19083	#[cfg_attr(test, assert_instr(vpsllq))]
19084	pub unsafe fn _mm256_mask_sll_epi64(
19085	src: __m256i,
19086	k: __mmask8,
19087	a: __m256i,
19088	count: __m128i,
19089	) -> __m256i {
19090	let shf: i64x4 = _mm256_sll_epi64(a, count).as_i64x4();
19091	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
19092	}
19093
19094	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19095	///
19096	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
19097	#[inline]
19098	#[target_feature(enable = "avx512f,avx512vl")]
19099	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19100	#[cfg_attr(test, assert_instr(vpsllq))]
19101	pub unsafe fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19102	let shf: i64x4 = _mm256_sll_epi64(a, count).as_i64x4();
19103	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
19104	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19105	}
19106
19107	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19108	///
19109	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
19110	#[inline]
19111	#[target_feature(enable = "avx512f,avx512vl")]
19112	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19113	#[cfg_attr(test, assert_instr(vpsllq))]
19114	pub unsafe fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19115	let shf: i64x2 = _mm_sll_epi64(a, count).as_i64x2();
19116	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
19117	}
19118
19119	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19120	///
19121	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
19122	#[inline]
19123	#[target_feature(enable = "avx512f,avx512vl")]
19124	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19125	#[cfg_attr(test, assert_instr(vpsllq))]
19126	pub unsafe fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19127	let shf: i64x2 = _mm_sll_epi64(a, count).as_i64x2();
19128	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
19129	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19130	}
19131
19132	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
19133	///
19134	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
19135	#[inline]
19136	#[target_feature(enable = "avx512f")]
19137	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19138	#[cfg_attr(test, assert_instr(vpsrlq))]
19139	pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
19140	transmute(src:vpsrlq(a:a.as_i64x8(), count:count.as_i64x2()))
19141	}
19142
19143	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19144	///
19145	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
19146	#[inline]
19147	#[target_feature(enable = "avx512f")]
19148	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19149	#[cfg_attr(test, assert_instr(vpsrlq))]
19150	pub unsafe fn _mm512_mask_srl_epi64(
19151	src: __m512i,
19152	k: __mmask8,
19153	a: __m512i,
19154	count: __m128i,
19155	) -> __m512i {
19156	let shf: i64x8 = _mm512_srl_epi64(a, count).as_i64x8();
19157	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
19158	}
19159
19160	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19161	///
19162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
19163	#[inline]
19164	#[target_feature(enable = "avx512f")]
19165	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19166	#[cfg_attr(test, assert_instr(vpsrlq))]
19167	pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
19168	let shf: i64x8 = _mm512_srl_epi64(a, count).as_i64x8();
19169	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
19170	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19171	}
19172
19173	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19174	///
19175	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
19176	#[inline]
19177	#[target_feature(enable = "avx512f,avx512vl")]
19178	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19179	#[cfg_attr(test, assert_instr(vpsrlq))]
19180	pub unsafe fn _mm256_mask_srl_epi64(
19181	src: __m256i,
19182	k: __mmask8,
19183	a: __m256i,
19184	count: __m128i,
19185	) -> __m256i {
19186	let shf: i64x4 = _mm256_srl_epi64(a, count).as_i64x4();
19187	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
19188	}
19189
19190	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19191	///
19192	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
19193	#[inline]
19194	#[target_feature(enable = "avx512f,avx512vl")]
19195	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19196	#[cfg_attr(test, assert_instr(vpsrlq))]
19197	pub unsafe fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19198	let shf: i64x4 = _mm256_srl_epi64(a, count).as_i64x4();
19199	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
19200	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19201	}
19202
19203	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19204	///
19205	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
19206	#[inline]
19207	#[target_feature(enable = "avx512f,avx512vl")]
19208	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19209	#[cfg_attr(test, assert_instr(vpsrlq))]
19210	pub unsafe fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19211	let shf: i64x2 = _mm_srl_epi64(a, count).as_i64x2();
19212	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
19213	}
19214
19215	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19216	///
19217	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
19218	#[inline]
19219	#[target_feature(enable = "avx512f,avx512vl")]
19220	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19221	#[cfg_attr(test, assert_instr(vpsrlq))]
19222	pub unsafe fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19223	let shf: i64x2 = _mm_srl_epi64(a, count).as_i64x2();
19224	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
19225	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19226	}
19227
19228	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
19229	///
19230	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
19231	#[inline]
19232	#[target_feature(enable = "avx512f")]
19233	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19234	#[cfg_attr(test, assert_instr(vpsrad))]
19235	pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
19236	transmute(src:vpsrad(a:a.as_i32x16(), count:count.as_i32x4()))
19237	}
19238
19239	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19240	///
19241	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
19242	#[inline]
19243	#[target_feature(enable = "avx512f")]
19244	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19245	#[cfg_attr(test, assert_instr(vpsrad))]
19246	pub unsafe fn _mm512_mask_sra_epi32(
19247	src: __m512i,
19248	k: __mmask16,
19249	a: __m512i,
19250	count: __m128i,
19251	) -> __m512i {
19252	let shf: i32x16 = _mm512_sra_epi32(a, count).as_i32x16();
19253	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
19254	}
19255
19256	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19257	///
19258	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
19259	#[inline]
19260	#[target_feature(enable = "avx512f")]
19261	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19262	#[cfg_attr(test, assert_instr(vpsrad))]
19263	pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
19264	let shf: i32x16 = _mm512_sra_epi32(a, count).as_i32x16();
19265	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
19266	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19267	}
19268
19269	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19270	///
19271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
19272	#[inline]
19273	#[target_feature(enable = "avx512f,avx512vl")]
19274	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19275	#[cfg_attr(test, assert_instr(vpsrad))]
19276	pub unsafe fn _mm256_mask_sra_epi32(
19277	src: __m256i,
19278	k: __mmask8,
19279	a: __m256i,
19280	count: __m128i,
19281	) -> __m256i {
19282	let shf: i32x8 = _mm256_sra_epi32(a, count).as_i32x8();
19283	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
19284	}
19285
19286	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19287	///
19288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
19289	#[inline]
19290	#[target_feature(enable = "avx512f,avx512vl")]
19291	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19292	#[cfg_attr(test, assert_instr(vpsrad))]
19293	pub unsafe fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19294	let shf: i32x8 = _mm256_sra_epi32(a, count).as_i32x8();
19295	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
19296	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19297	}
19298
19299	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19300	///
19301	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
19302	#[inline]
19303	#[target_feature(enable = "avx512f,avx512vl")]
19304	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19305	#[cfg_attr(test, assert_instr(vpsrad))]
19306	pub unsafe fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19307	let shf: i32x4 = _mm_sra_epi32(a, count).as_i32x4();
19308	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
19309	}
19310
19311	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19312	///
19313	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
19314	#[inline]
19315	#[target_feature(enable = "avx512f,avx512vl")]
19316	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19317	#[cfg_attr(test, assert_instr(vpsrad))]
19318	pub unsafe fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19319	let shf: i32x4 = _mm_sra_epi32(a, count).as_i32x4();
19320	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
19321	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19322	}
19323
19324	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
19325	///
19326	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
19327	#[inline]
19328	#[target_feature(enable = "avx512f")]
19329	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19330	#[cfg_attr(test, assert_instr(vpsraq))]
19331	pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
19332	transmute(src:vpsraq(a:a.as_i64x8(), count:count.as_i64x2()))
19333	}
19334
19335	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19336	///
19337	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
19338	#[inline]
19339	#[target_feature(enable = "avx512f")]
19340	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19341	#[cfg_attr(test, assert_instr(vpsraq))]
19342	pub unsafe fn _mm512_mask_sra_epi64(
19343	src: __m512i,
19344	k: __mmask8,
19345	a: __m512i,
19346	count: __m128i,
19347	) -> __m512i {
19348	let shf: i64x8 = _mm512_sra_epi64(a, count).as_i64x8();
19349	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
19350	}
19351
19352	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19353	///
19354	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
19355	#[inline]
19356	#[target_feature(enable = "avx512f")]
19357	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19358	#[cfg_attr(test, assert_instr(vpsraq))]
19359	pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
19360	let shf: i64x8 = _mm512_sra_epi64(a, count).as_i64x8();
19361	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
19362	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19363	}
19364
19365	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
19366	///
19367	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
19368	#[inline]
19369	#[target_feature(enable = "avx512f,avx512vl")]
19370	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19371	#[cfg_attr(test, assert_instr(vpsraq))]
19372	pub unsafe fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
19373	transmute(src:vpsraq256(a:a.as_i64x4(), count:count.as_i64x2()))
19374	}
19375
19376	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19377	///
19378	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
19379	#[inline]
19380	#[target_feature(enable = "avx512f,avx512vl")]
19381	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19382	#[cfg_attr(test, assert_instr(vpsraq))]
19383	pub unsafe fn _mm256_mask_sra_epi64(
19384	src: __m256i,
19385	k: __mmask8,
19386	a: __m256i,
19387	count: __m128i,
19388	) -> __m256i {
19389	let shf: i64x4 = _mm256_sra_epi64(a, count).as_i64x4();
19390	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
19391	}
19392
19393	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19394	///
19395	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
19396	#[inline]
19397	#[target_feature(enable = "avx512f,avx512vl")]
19398	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19399	#[cfg_attr(test, assert_instr(vpsraq))]
19400	pub unsafe fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19401	let shf: i64x4 = _mm256_sra_epi64(a, count).as_i64x4();
19402	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
19403	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19404	}
19405
19406	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
19407	///
19408	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
19409	#[inline]
19410	#[target_feature(enable = "avx512f,avx512vl")]
19411	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19412	#[cfg_attr(test, assert_instr(vpsraq))]
19413	pub unsafe fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
19414	transmute(src:vpsraq128(a:a.as_i64x2(), count:count.as_i64x2()))
19415	}
19416
19417	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19418	///
19419	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
19420	#[inline]
19421	#[target_feature(enable = "avx512f,avx512vl")]
19422	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19423	#[cfg_attr(test, assert_instr(vpsraq))]
19424	pub unsafe fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19425	let shf: i64x2 = _mm_sra_epi64(a, count).as_i64x2();
19426	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
19427	}
19428
19429	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19430	///
19431	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
19432	#[inline]
19433	#[target_feature(enable = "avx512f,avx512vl")]
19434	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19435	#[cfg_attr(test, assert_instr(vpsraq))]
19436	pub unsafe fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19437	let shf: i64x2 = _mm_sra_epi64(a, count).as_i64x2();
19438	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
19439	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19440	}
19441
19442	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
19443	///
19444	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
19445	#[inline]
19446	#[target_feature(enable = "avx512f")]
19447	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19448	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
19449	#[rustc_legacy_const_generics(`1`)]
19450	pub unsafe fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19451	static_assert_uimm_bits!(IMM8, `8`);
19452	transmute(src:simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(`31`) as i32)))
19453	}
19454
19455	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19456	///
19457	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
19458	#[inline]
19459	#[target_feature(enable = "avx512f")]
19460	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19461	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
19462	#[rustc_legacy_const_generics(`3`)]
19463	pub unsafe fn _mm512_mask_srai_epi32<const IMM8: u32>(
19464	src: __m512i,
19465	k: __mmask16,
19466	a: __m512i,
19467	) -> __m512i {
19468	static_assert_uimm_bits!(IMM8, `8`);
19469	let r: i32x16 = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(`31`) as i32));
19470	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
19471	}
19472
19473	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19474	///
19475	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
19476	#[inline]
19477	#[target_feature(enable = "avx512f")]
19478	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19479	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
19480	#[rustc_legacy_const_generics(`2`)]
19481	pub unsafe fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19482	static_assert_uimm_bits!(IMM8, `8`);
19483	let r: i32x16 = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(`31`) as i32));
19484	let zero: i32x16 = i32x16::splat(`0`);
19485	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
19486	}
19487
19488	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19489	///
19490	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
19491	#[inline]
19492	#[target_feature(enable = "avx512f,avx512vl")]
19493	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19494	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
19495	#[rustc_legacy_const_generics(`3`)]
19496	pub unsafe fn _mm256_mask_srai_epi32<const IMM8: u32>(
19497	src: __m256i,
19498	k: __mmask8,
19499	a: __m256i,
19500	) -> __m256i {
19501	let r: i32x8 = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(`31`) as i32));
19502	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
19503	}
19504
19505	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19506	///
19507	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
19508	#[inline]
19509	#[target_feature(enable = "avx512f,avx512vl")]
19510	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19511	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
19512	#[rustc_legacy_const_generics(`2`)]
19513	pub unsafe fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19514	let r: i32x8 = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(`31`) as i32));
19515	let zero: i32x8 = i32x8::splat(`0`);
19516	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
19517	}
19518
19519	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19520	///
19521	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
19522	#[inline]
19523	#[target_feature(enable = "avx512f,avx512vl")]
19524	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19525	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
19526	#[rustc_legacy_const_generics(`3`)]
19527	pub unsafe fn _mm_mask_srai_epi32<const IMM8: u32>(
19528	src: __m128i,
19529	k: __mmask8,
19530	a: __m128i,
19531	) -> __m128i {
19532	let r: i32x4 = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(`31`) as i32));
19533	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
19534	}
19535
19536	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19537	///
19538	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
19539	#[inline]
19540	#[target_feature(enable = "avx512f,avx512vl")]
19541	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19542	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
19543	#[rustc_legacy_const_generics(`2`)]
19544	pub unsafe fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19545	let r: i32x4 = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(`31`) as i32));
19546	let zero: i32x4 = i32x4::splat(`0`);
19547	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
19548	}
19549
19550	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
19551	///
19552	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
19553	#[inline]
19554	#[target_feature(enable = "avx512f")]
19555	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19556	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
19557	#[rustc_legacy_const_generics(`1`)]
19558	pub unsafe fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19559	static_assert_uimm_bits!(IMM8, `8`);
19560	transmute(src:simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(`63`) as i64)))
19561	}
19562
19563	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19564	///
19565	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
19566	#[inline]
19567	#[target_feature(enable = "avx512f")]
19568	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19569	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
19570	#[rustc_legacy_const_generics(`3`)]
19571	pub unsafe fn _mm512_mask_srai_epi64<const IMM8: u32>(
19572	src: __m512i,
19573	k: __mmask8,
19574	a: __m512i,
19575	) -> __m512i {
19576	static_assert_uimm_bits!(IMM8, `8`);
19577	let shf: i64x8 = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(`63`) as i64));
19578	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
19579	}
19580
19581	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19582	///
19583	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
19584	#[inline]
19585	#[target_feature(enable = "avx512f")]
19586	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19587	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
19588	#[rustc_legacy_const_generics(`2`)]
19589	pub unsafe fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
19590	static_assert_uimm_bits!(IMM8, `8`);
19591	let shf: i64x8 = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(`63`) as i64));
19592	let zero: i64x8 = i64x8::splat(`0`);
19593	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19594	}
19595
19596	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
19597	///
19598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
19599	#[inline]
19600	#[target_feature(enable = "avx512f,avx512vl")]
19601	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19602	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
19603	#[rustc_legacy_const_generics(`1`)]
19604	pub unsafe fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
19605	static_assert_uimm_bits!(IMM8, `8`);
19606	transmute(src:simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(`63`) as i64)))
19607	}
19608
19609	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19610	///
19611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
19612	#[inline]
19613	#[target_feature(enable = "avx512f,avx512vl")]
19614	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19615	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
19616	#[rustc_legacy_const_generics(`3`)]
19617	pub unsafe fn _mm256_mask_srai_epi64<const IMM8: u32>(
19618	src: __m256i,
19619	k: __mmask8,
19620	a: __m256i,
19621	) -> __m256i {
19622	static_assert_uimm_bits!(IMM8, `8`);
19623	let shf: i64x4 = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(`63`) as i64));
19624	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
19625	}
19626
19627	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19628	///
19629	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
19630	#[inline]
19631	#[target_feature(enable = "avx512f,avx512vl")]
19632	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19633	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
19634	#[rustc_legacy_const_generics(`2`)]
19635	pub unsafe fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19636	static_assert_uimm_bits!(IMM8, `8`);
19637	let shf: i64x4 = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(`63`) as i64));
19638	let zero: i64x4 = i64x4::splat(`0`);
19639	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19640	}
19641
19642	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
19643	///
19644	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
19645	#[inline]
19646	#[target_feature(enable = "avx512f,avx512vl")]
19647	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19648	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
19649	#[rustc_legacy_const_generics(`1`)]
19650	pub unsafe fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
19651	static_assert_uimm_bits!(IMM8, `8`);
19652	transmute(src:simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(`63`) as i64)))
19653	}
19654
19655	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19656	///
19657	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
19658	#[inline]
19659	#[target_feature(enable = "avx512f,avx512vl")]
19660	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19661	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
19662	#[rustc_legacy_const_generics(`3`)]
19663	pub unsafe fn _mm_mask_srai_epi64<const IMM8: u32>(
19664	src: __m128i,
19665	k: __mmask8,
19666	a: __m128i,
19667	) -> __m128i {
19668	static_assert_uimm_bits!(IMM8, `8`);
19669	let shf: i64x2 = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(`63`) as i64));
19670	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
19671	}
19672
19673	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19674	///
19675	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
19676	#[inline]
19677	#[target_feature(enable = "avx512f,avx512vl")]
19678	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19679	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
19680	#[rustc_legacy_const_generics(`2`)]
19681	pub unsafe fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19682	static_assert_uimm_bits!(IMM8, `8`);
19683	let shf: i64x2 = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(`63`) as i64));
19684	let zero: i64x2 = i64x2::splat(`0`);
19685	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19686	}
19687
19688	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
19689	///
19690	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
19691	#[inline]
19692	#[target_feature(enable = "avx512f")]
19693	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19694	#[cfg_attr(test, assert_instr(vpsravd))]
19695	pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
19696	transmute(src:vpsravd(a:a.as_i32x16(), count:count.as_i32x16()))
19697	}
19698
19699	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19700	///
19701	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
19702	#[inline]
19703	#[target_feature(enable = "avx512f")]
19704	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19705	#[cfg_attr(test, assert_instr(vpsravd))]
19706	pub unsafe fn _mm512_mask_srav_epi32(
19707	src: __m512i,
19708	k: __mmask16,
19709	a: __m512i,
19710	count: __m512i,
19711	) -> __m512i {
19712	let shf: i32x16 = _mm512_srav_epi32(a, count).as_i32x16();
19713	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
19714	}
19715
19716	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19717	///
19718	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
19719	#[inline]
19720	#[target_feature(enable = "avx512f")]
19721	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19722	#[cfg_attr(test, assert_instr(vpsravd))]
19723	pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
19724	let shf: i32x16 = _mm512_srav_epi32(a, count).as_i32x16();
19725	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
19726	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19727	}
19728
19729	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19730	///
19731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
19732	#[inline]
19733	#[target_feature(enable = "avx512f,avx512vl")]
19734	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19735	#[cfg_attr(test, assert_instr(vpsravd))]
19736	pub unsafe fn _mm256_mask_srav_epi32(
19737	src: __m256i,
19738	k: __mmask8,
19739	a: __m256i,
19740	count: __m256i,
19741	) -> __m256i {
19742	let shf: i32x8 = _mm256_srav_epi32(a, count).as_i32x8();
19743	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
19744	}
19745
19746	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19747	///
19748	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
19749	#[inline]
19750	#[target_feature(enable = "avx512f,avx512vl")]
19751	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19752	#[cfg_attr(test, assert_instr(vpsravd))]
19753	pub unsafe fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
19754	let shf: i32x8 = _mm256_srav_epi32(a, count).as_i32x8();
19755	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
19756	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19757	}
19758
19759	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19760	///
19761	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
19762	#[inline]
19763	#[target_feature(enable = "avx512f,avx512vl")]
19764	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19765	#[cfg_attr(test, assert_instr(vpsravd))]
19766	pub unsafe fn _mm_mask_srav_epi32(
19767	src: __m128i,
19768	k: __mmask8,
19769	a: __m128i,
19770	count: __m128i,
19771	) -> __m128i {
19772	let shf: i32x4 = _mm_srav_epi32(a, count).as_i32x4();
19773	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
19774	}
19775
19776	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19777	///
19778	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
19779	#[inline]
19780	#[target_feature(enable = "avx512f,avx512vl")]
19781	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19782	#[cfg_attr(test, assert_instr(vpsravd))]
19783	pub unsafe fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19784	let shf: i32x4 = _mm_srav_epi32(a, count).as_i32x4();
19785	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
19786	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19787	}
19788
19789	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
19790	///
19791	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
19792	#[inline]
19793	#[target_feature(enable = "avx512f")]
19794	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19795	#[cfg_attr(test, assert_instr(vpsravq))]
19796	pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
19797	transmute(src:vpsravq(a:a.as_i64x8(), count:count.as_i64x8()))
19798	}
19799
19800	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19801	///
19802	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
19803	#[inline]
19804	#[target_feature(enable = "avx512f")]
19805	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19806	#[cfg_attr(test, assert_instr(vpsravq))]
19807	pub unsafe fn _mm512_mask_srav_epi64(
19808	src: __m512i,
19809	k: __mmask8,
19810	a: __m512i,
19811	count: __m512i,
19812	) -> __m512i {
19813	let shf: i64x8 = _mm512_srav_epi64(a, count).as_i64x8();
19814	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
19815	}
19816
19817	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19818	///
19819	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
19820	#[inline]
19821	#[target_feature(enable = "avx512f")]
19822	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19823	#[cfg_attr(test, assert_instr(vpsravq))]
19824	pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
19825	let shf: i64x8 = _mm512_srav_epi64(a, count).as_i64x8();
19826	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
19827	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19828	}
19829
19830	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
19831	///
19832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
19833	#[inline]
19834	#[target_feature(enable = "avx512f,avx512vl")]
19835	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19836	#[cfg_attr(test, assert_instr(vpsravq))]
19837	pub unsafe fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
19838	transmute(src:vpsravq256(a:a.as_i64x4(), count:count.as_i64x4()))
19839	}
19840
19841	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19842	///
19843	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
19844	#[inline]
19845	#[target_feature(enable = "avx512f,avx512vl")]
19846	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19847	#[cfg_attr(test, assert_instr(vpsravq))]
19848	pub unsafe fn _mm256_mask_srav_epi64(
19849	src: __m256i,
19850	k: __mmask8,
19851	a: __m256i,
19852	count: __m256i,
19853	) -> __m256i {
19854	let shf: i64x4 = _mm256_srav_epi64(a, count).as_i64x4();
19855	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
19856	}
19857
19858	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19859	///
19860	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
19861	#[inline]
19862	#[target_feature(enable = "avx512f,avx512vl")]
19863	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19864	#[cfg_attr(test, assert_instr(vpsravq))]
19865	pub unsafe fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
19866	let shf: i64x4 = _mm256_srav_epi64(a, count).as_i64x4();
19867	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
19868	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19869	}
19870
19871	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
19872	///
19873	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
19874	#[inline]
19875	#[target_feature(enable = "avx512f,avx512vl")]
19876	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19877	#[cfg_attr(test, assert_instr(vpsravq))]
19878	pub unsafe fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
19879	transmute(src:vpsravq128(a:a.as_i64x2(), count:count.as_i64x2()))
19880	}
19881
19882	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19883	///
19884	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
19885	#[inline]
19886	#[target_feature(enable = "avx512f,avx512vl")]
19887	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19888	#[cfg_attr(test, assert_instr(vpsravq))]
19889	pub unsafe fn _mm_mask_srav_epi64(
19890	src: __m128i,
19891	k: __mmask8,
19892	a: __m128i,
19893	count: __m128i,
19894	) -> __m128i {
19895	let shf: i64x2 = _mm_srav_epi64(a, count).as_i64x2();
19896	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
19897	}
19898
19899	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19900	///
19901	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
19902	#[inline]
19903	#[target_feature(enable = "avx512f,avx512vl")]
19904	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19905	#[cfg_attr(test, assert_instr(vpsravq))]
19906	pub unsafe fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19907	let shf: i64x2 = _mm_srav_epi64(a, count).as_i64x2();
19908	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
19909	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19910	}
19911
19912	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
19913	///
19914	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
19915	#[inline]
19916	#[target_feature(enable = "avx512f")]
19917	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19918	#[cfg_attr(test, assert_instr(vprolvd))]
19919	pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
19920	transmute(src:vprolvd(a:a.as_i32x16(), b:b.as_i32x16()))
19921	}
19922
19923	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19924	///
19925	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
19926	#[inline]
19927	#[target_feature(enable = "avx512f")]
19928	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19929	#[cfg_attr(test, assert_instr(vprolvd))]
19930	pub unsafe fn _mm512_mask_rolv_epi32(
19931	src: __m512i,
19932	k: __mmask16,
19933	a: __m512i,
19934	b: __m512i,
19935	) -> __m512i {
19936	let rol: i32x16 = _mm512_rolv_epi32(a, b).as_i32x16();
19937	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x16()))
19938	}
19939
19940	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19941	///
19942	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
19943	#[inline]
19944	#[target_feature(enable = "avx512f")]
19945	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19946	#[cfg_attr(test, assert_instr(vprolvd))]
19947	pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
19948	let rol: i32x16 = _mm512_rolv_epi32(a, b).as_i32x16();
19949	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
19950	transmute(src:simd_select_bitmask(m:k, yes:rol, no:zero))
19951	}
19952
19953	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
19954	///
19955	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
19956	#[inline]
19957	#[target_feature(enable = "avx512f,avx512vl")]
19958	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19959	#[cfg_attr(test, assert_instr(vprolvd))]
19960	pub unsafe fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
19961	transmute(src:vprolvd256(a:a.as_i32x8(), b:b.as_i32x8()))
19962	}
19963
19964	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19965	///
19966	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi3&expand=4698)
19967	#[inline]
19968	#[target_feature(enable = "avx512f,avx512vl")]
19969	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19970	#[cfg_attr(test, assert_instr(vprolvd))]
19971	pub unsafe fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
19972	let rol: i32x8 = _mm256_rolv_epi32(a, b).as_i32x8();
19973	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x8()))
19974	}
19975
19976	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19977	///
19978	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
19979	#[inline]
19980	#[target_feature(enable = "avx512f,avx512vl")]
19981	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19982	#[cfg_attr(test, assert_instr(vprolvd))]
19983	pub unsafe fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
19984	let rol: i32x8 = _mm256_rolv_epi32(a, b).as_i32x8();
19985	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
19986	transmute(src:simd_select_bitmask(m:k, yes:rol, no:zero))
19987	}
19988
19989	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
19990	///
19991	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
19992	#[inline]
19993	#[target_feature(enable = "avx512f,avx512vl")]
19994	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19995	#[cfg_attr(test, assert_instr(vprolvd))]
19996	pub unsafe fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
19997	transmute(src:vprolvd128(a:a.as_i32x4(), b:b.as_i32x4()))
19998	}
19999
20000	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20001	///
20002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
20003	#[inline]
20004	#[target_feature(enable = "avx512f,avx512vl")]
20005	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20006	#[cfg_attr(test, assert_instr(vprolvd))]
20007	pub unsafe fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20008	let rol: i32x4 = _mm_rolv_epi32(a, b).as_i32x4();
20009	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x4()))
20010	}
20011
20012	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20013	///
20014	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
20015	#[inline]
20016	#[target_feature(enable = "avx512f,avx512vl")]
20017	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20018	#[cfg_attr(test, assert_instr(vprolvd))]
20019	pub unsafe fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20020	let rol: i32x4 = _mm_rolv_epi32(a, b).as_i32x4();
20021	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
20022	transmute(src:simd_select_bitmask(m:k, yes:rol, no:zero))
20023	}
20024
20025	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20026	///
20027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
20028	#[inline]
20029	#[target_feature(enable = "avx512f")]
20030	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20031	#[cfg_attr(test, assert_instr(vprorvd))]
20032	pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
20033	transmute(src:vprorvd(a:a.as_i32x16(), b:b.as_i32x16()))
20034	}
20035
20036	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20037	///
20038	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
20039	#[inline]
20040	#[target_feature(enable = "avx512f")]
20041	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20042	#[cfg_attr(test, assert_instr(vprorvd))]
20043	pub unsafe fn _mm512_mask_rorv_epi32(
20044	src: __m512i,
20045	k: __mmask16,
20046	a: __m512i,
20047	b: __m512i,
20048	) -> __m512i {
20049	let ror: i32x16 = _mm512_rorv_epi32(a, b).as_i32x16();
20050	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x16()))
20051	}
20052
20053	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20054	///
20055	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
20056	#[inline]
20057	#[target_feature(enable = "avx512f")]
20058	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20059	#[cfg_attr(test, assert_instr(vprorvd))]
20060	pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
20061	let ror: i32x16 = _mm512_rorv_epi32(a, b).as_i32x16();
20062	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
20063	transmute(src:simd_select_bitmask(m:k, yes:ror, no:zero))
20064	}
20065
20066	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20067	///
20068	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
20069	#[inline]
20070	#[target_feature(enable = "avx512f,avx512vl")]
20071	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20072	#[cfg_attr(test, assert_instr(vprorvd))]
20073	pub unsafe fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
20074	transmute(src:vprorvd256(a:a.as_i32x8(), b:b.as_i32x8()))
20075	}
20076
20077	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20078	///
20079	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
20080	#[inline]
20081	#[target_feature(enable = "avx512f,avx512vl")]
20082	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20083	#[cfg_attr(test, assert_instr(vprorvd))]
20084	pub unsafe fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20085	let ror: i32x8 = _mm256_rorv_epi32(a, b).as_i32x8();
20086	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x8()))
20087	}
20088
20089	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20090	///
20091	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
20092	#[inline]
20093	#[target_feature(enable = "avx512f,avx512vl")]
20094	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20095	#[cfg_attr(test, assert_instr(vprorvd))]
20096	pub unsafe fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20097	let ror: i32x8 = _mm256_rorv_epi32(a, b).as_i32x8();
20098	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
20099	transmute(src:simd_select_bitmask(m:k, yes:ror, no:zero))
20100	}
20101
20102	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20103	///
20104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
20105	#[inline]
20106	#[target_feature(enable = "avx512f,avx512vl")]
20107	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20108	#[cfg_attr(test, assert_instr(vprorvd))]
20109	pub unsafe fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
20110	transmute(src:vprorvd128(a:a.as_i32x4(), b:b.as_i32x4()))
20111	}
20112
20113	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20114	///
20115	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
20116	#[inline]
20117	#[target_feature(enable = "avx512f,avx512vl")]
20118	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20119	#[cfg_attr(test, assert_instr(vprorvd))]
20120	pub unsafe fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20121	let ror: i32x4 = _mm_rorv_epi32(a, b).as_i32x4();
20122	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x4()))
20123	}
20124
20125	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20126	///
20127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
20128	#[inline]
20129	#[target_feature(enable = "avx512f,avx512vl")]
20130	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20131	#[cfg_attr(test, assert_instr(vprorvd))]
20132	pub unsafe fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20133	let ror: i32x4 = _mm_rorv_epi32(a, b).as_i32x4();
20134	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
20135	transmute(src:simd_select_bitmask(m:k, yes:ror, no:zero))
20136	}
20137
20138	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
20139	///
20140	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
20141	#[inline]
20142	#[target_feature(enable = "avx512f")]
20143	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20144	#[cfg_attr(test, assert_instr(vprolvq))]
20145	pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
20146	transmute(src:vprolvq(a:a.as_i64x8(), b:b.as_i64x8()))
20147	}
20148
20149	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20150	///
20151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
20152	#[inline]
20153	#[target_feature(enable = "avx512f")]
20154	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20155	#[cfg_attr(test, assert_instr(vprolvq))]
20156	pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
20157	let rol: i64x8 = _mm512_rolv_epi64(a, b).as_i64x8();
20158	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x8()))
20159	}
20160
20161	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20162	///
20163	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
20164	#[inline]
20165	#[target_feature(enable = "avx512f")]
20166	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20167	#[cfg_attr(test, assert_instr(vprolvq))]
20168	pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
20169	let rol: i64x8 = _mm512_rolv_epi64(a, b).as_i64x8();
20170	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
20171	transmute(src:simd_select_bitmask(m:k, yes:rol, no:zero))
20172	}
20173
20174	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
20175	///
20176	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
20177	#[inline]
20178	#[target_feature(enable = "avx512f,avx512vl")]
20179	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20180	#[cfg_attr(test, assert_instr(vprolvq))]
20181	pub unsafe fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
20182	transmute(src:vprolvq256(a:a.as_i64x4(), b:b.as_i64x4()))
20183	}
20184
20185	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20186	///
20187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
20188	#[inline]
20189	#[target_feature(enable = "avx512f,avx512vl")]
20190	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20191	#[cfg_attr(test, assert_instr(vprolvq))]
20192	pub unsafe fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20193	let rol: i64x4 = _mm256_rolv_epi64(a, b).as_i64x4();
20194	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x4()))
20195	}
20196
20197	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20198	///
20199	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
20200	#[inline]
20201	#[target_feature(enable = "avx512f,avx512vl")]
20202	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20203	#[cfg_attr(test, assert_instr(vprolvq))]
20204	pub unsafe fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20205	let rol: i64x4 = _mm256_rolv_epi64(a, b).as_i64x4();
20206	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
20207	transmute(src:simd_select_bitmask(m:k, yes:rol, no:zero))
20208	}
20209
20210	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
20211	///
20212	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
20213	#[inline]
20214	#[target_feature(enable = "avx512f,avx512vl")]
20215	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20216	#[cfg_attr(test, assert_instr(vprolvq))]
20217	pub unsafe fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
20218	transmute(src:vprolvq128(a:a.as_i64x2(), b:b.as_i64x2()))
20219	}
20220
20221	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20222	///
20223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
20224	#[inline]
20225	#[target_feature(enable = "avx512f,avx512vl")]
20226	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20227	#[cfg_attr(test, assert_instr(vprolvq))]
20228	pub unsafe fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20229	let rol: i64x2 = _mm_rolv_epi64(a, b).as_i64x2();
20230	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x2()))
20231	}
20232
20233	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20234	///
20235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
20236	#[inline]
20237	#[target_feature(enable = "avx512f,avx512vl")]
20238	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20239	#[cfg_attr(test, assert_instr(vprolvq))]
20240	pub unsafe fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20241	let rol: i64x2 = _mm_rolv_epi64(a, b).as_i64x2();
20242	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
20243	transmute(src:simd_select_bitmask(m:k, yes:rol, no:zero))
20244	}
20245
20246	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20247	///
20248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
20249	#[inline]
20250	#[target_feature(enable = "avx512f")]
20251	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20252	#[cfg_attr(test, assert_instr(vprorvq))]
20253	pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
20254	transmute(src:vprorvq(a:a.as_i64x8(), b:b.as_i64x8()))
20255	}
20256
20257	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20258	///
20259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
20260	#[inline]
20261	#[target_feature(enable = "avx512f")]
20262	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20263	#[cfg_attr(test, assert_instr(vprorvq))]
20264	pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
20265	let ror: i64x8 = _mm512_rorv_epi64(a, b).as_i64x8();
20266	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x8()))
20267	}
20268
20269	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20270	///
20271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
20272	#[inline]
20273	#[target_feature(enable = "avx512f")]
20274	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20275	#[cfg_attr(test, assert_instr(vprorvq))]
20276	pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
20277	let ror: i64x8 = _mm512_rorv_epi64(a, b).as_i64x8();
20278	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
20279	transmute(src:simd_select_bitmask(m:k, yes:ror, no:zero))
20280	}
20281
20282	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20283	///
20284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
20285	#[inline]
20286	#[target_feature(enable = "avx512f,avx512vl")]
20287	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20288	#[cfg_attr(test, assert_instr(vprorvq))]
20289	pub unsafe fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
20290	transmute(src:vprorvq256(a:a.as_i64x4(), b:b.as_i64x4()))
20291	}
20292
20293	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20294	///
20295	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
20296	#[inline]
20297	#[target_feature(enable = "avx512f,avx512vl")]
20298	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20299	#[cfg_attr(test, assert_instr(vprorvq))]
20300	pub unsafe fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20301	let ror: i64x4 = _mm256_rorv_epi64(a, b).as_i64x4();
20302	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x4()))
20303	}
20304
20305	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20306	///
20307	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
20308	#[inline]
20309	#[target_feature(enable = "avx512f,avx512vl")]
20310	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20311	#[cfg_attr(test, assert_instr(vprorvq))]
20312	pub unsafe fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20313	let ror: i64x4 = _mm256_rorv_epi64(a, b).as_i64x4();
20314	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
20315	transmute(src:simd_select_bitmask(m:k, yes:ror, no:zero))
20316	}
20317
20318	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20319	///
20320	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
20321	#[inline]
20322	#[target_feature(enable = "avx512f,avx512vl")]
20323	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20324	#[cfg_attr(test, assert_instr(vprorvq))]
20325	pub unsafe fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
20326	transmute(src:vprorvq128(a:a.as_i64x2(), b:b.as_i64x2()))
20327	}
20328
20329	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20330	///
20331	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
20332	#[inline]
20333	#[target_feature(enable = "avx512f,avx512vl")]
20334	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20335	#[cfg_attr(test, assert_instr(vprorvq))]
20336	pub unsafe fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20337	let ror: i64x2 = _mm_rorv_epi64(a, b).as_i64x2();
20338	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x2()))
20339	}
20340
20341	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20342	///
20343	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
20344	#[inline]
20345	#[target_feature(enable = "avx512f,avx512vl")]
20346	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20347	#[cfg_attr(test, assert_instr(vprorvq))]
20348	pub unsafe fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20349	let ror: i64x2 = _mm_rorv_epi64(a, b).as_i64x2();
20350	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
20351	transmute(src:simd_select_bitmask(m:k, yes:ror, no:zero))
20352	}
20353
20354	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
20355	///
20356	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
20357	#[inline]
20358	#[target_feature(enable = "avx512f")]
20359	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20360	#[cfg_attr(test, assert_instr(vpsllvd))]
20361	pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
20362	transmute(src:vpsllvd(a:a.as_i32x16(), b:count.as_i32x16()))
20363	}
20364
20365	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20366	///
20367	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
20368	#[inline]
20369	#[target_feature(enable = "avx512f")]
20370	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20371	#[cfg_attr(test, assert_instr(vpsllvd))]
20372	pub unsafe fn _mm512_mask_sllv_epi32(
20373	src: __m512i,
20374	k: __mmask16,
20375	a: __m512i,
20376	count: __m512i,
20377	) -> __m512i {
20378	let shf: i32x16 = _mm512_sllv_epi32(a, count).as_i32x16();
20379	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20380	}
20381
20382	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20383	///
20384	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
20385	#[inline]
20386	#[target_feature(enable = "avx512f")]
20387	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20388	#[cfg_attr(test, assert_instr(vpsllvd))]
20389	pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
20390	let shf: i32x16 = _mm512_sllv_epi32(a, count).as_i32x16();
20391	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
20392	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20393	}
20394
20395	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20396	///
20397	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
20398	#[inline]
20399	#[target_feature(enable = "avx512f,avx512vl")]
20400	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20401	#[cfg_attr(test, assert_instr(vpsllvd))]
20402	pub unsafe fn _mm256_mask_sllv_epi32(
20403	src: __m256i,
20404	k: __mmask8,
20405	a: __m256i,
20406	count: __m256i,
20407	) -> __m256i {
20408	let shf: i32x8 = _mm256_sllv_epi32(a, count).as_i32x8();
20409	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20410	}
20411
20412	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20413	///
20414	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
20415	#[inline]
20416	#[target_feature(enable = "avx512f,avx512vl")]
20417	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20418	#[cfg_attr(test, assert_instr(vpsllvd))]
20419	pub unsafe fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20420	let shf: i32x8 = _mm256_sllv_epi32(a, count).as_i32x8();
20421	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
20422	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20423	}
20424
20425	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20426	///
20427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
20428	#[inline]
20429	#[target_feature(enable = "avx512f,avx512vl")]
20430	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20431	#[cfg_attr(test, assert_instr(vpsllvd))]
20432	pub unsafe fn _mm_mask_sllv_epi32(
20433	src: __m128i,
20434	k: __mmask8,
20435	a: __m128i,
20436	count: __m128i,
20437	) -> __m128i {
20438	let shf: i32x4 = _mm_sllv_epi32(a, count).as_i32x4();
20439	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20440	}
20441
20442	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20443	///
20444	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
20445	#[inline]
20446	#[target_feature(enable = "avx512f,avx512vl")]
20447	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20448	#[cfg_attr(test, assert_instr(vpsllvd))]
20449	pub unsafe fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20450	let shf: i32x4 = _mm_sllv_epi32(a, count).as_i32x4();
20451	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
20452	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20453	}
20454
20455	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
20456	///
20457	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
20458	#[inline]
20459	#[target_feature(enable = "avx512f")]
20460	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20461	#[cfg_attr(test, assert_instr(vpsrlvd))]
20462	pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
20463	transmute(src:vpsrlvd(a:a.as_i32x16(), b:count.as_i32x16()))
20464	}
20465
20466	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20467	///
20468	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
20469	#[inline]
20470	#[target_feature(enable = "avx512f")]
20471	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20472	#[cfg_attr(test, assert_instr(vpsrlvd))]
20473	pub unsafe fn _mm512_mask_srlv_epi32(
20474	src: __m512i,
20475	k: __mmask16,
20476	a: __m512i,
20477	count: __m512i,
20478	) -> __m512i {
20479	let shf: i32x16 = _mm512_srlv_epi32(a, count).as_i32x16();
20480	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20481	}
20482
20483	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20484	///
20485	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
20486	#[inline]
20487	#[target_feature(enable = "avx512f")]
20488	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20489	#[cfg_attr(test, assert_instr(vpsrlvd))]
20490	pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
20491	let shf: i32x16 = _mm512_srlv_epi32(a, count).as_i32x16();
20492	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
20493	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20494	}
20495
20496	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20497	///
20498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
20499	#[inline]
20500	#[target_feature(enable = "avx512f,avx512vl")]
20501	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20502	#[cfg_attr(test, assert_instr(vpsrlvd))]
20503	pub unsafe fn _mm256_mask_srlv_epi32(
20504	src: __m256i,
20505	k: __mmask8,
20506	a: __m256i,
20507	count: __m256i,
20508	) -> __m256i {
20509	let shf: i32x8 = _mm256_srlv_epi32(a, count).as_i32x8();
20510	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20511	}
20512
20513	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20514	///
20515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
20516	#[inline]
20517	#[target_feature(enable = "avx512f,avx512vl")]
20518	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20519	#[cfg_attr(test, assert_instr(vpsrlvd))]
20520	pub unsafe fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20521	let shf: i32x8 = _mm256_srlv_epi32(a, count).as_i32x8();
20522	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
20523	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20524	}
20525
20526	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20527	///
20528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
20529	#[inline]
20530	#[target_feature(enable = "avx512f,avx512vl")]
20531	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20532	#[cfg_attr(test, assert_instr(vpsrlvd))]
20533	pub unsafe fn _mm_mask_srlv_epi32(
20534	src: __m128i,
20535	k: __mmask8,
20536	a: __m128i,
20537	count: __m128i,
20538	) -> __m128i {
20539	let shf: i32x4 = _mm_srlv_epi32(a, count).as_i32x4();
20540	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20541	}
20542
20543	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20544	///
20545	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
20546	#[inline]
20547	#[target_feature(enable = "avx512f,avx512vl")]
20548	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20549	#[cfg_attr(test, assert_instr(vpsrlvd))]
20550	pub unsafe fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20551	let shf: i32x4 = _mm_srlv_epi32(a, count).as_i32x4();
20552	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
20553	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20554	}
20555
20556	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
20557	///
20558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
20559	#[inline]
20560	#[target_feature(enable = "avx512f")]
20561	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20562	#[cfg_attr(test, assert_instr(vpsllvq))]
20563	pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
20564	transmute(src:vpsllvq(a:a.as_i64x8(), b:count.as_i64x8()))
20565	}
20566
20567	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20568	///
20569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
20570	#[inline]
20571	#[target_feature(enable = "avx512f")]
20572	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20573	#[cfg_attr(test, assert_instr(vpsllvq))]
20574	pub unsafe fn _mm512_mask_sllv_epi64(
20575	src: __m512i,
20576	k: __mmask8,
20577	a: __m512i,
20578	count: __m512i,
20579	) -> __m512i {
20580	let shf: i64x8 = _mm512_sllv_epi64(a, count).as_i64x8();
20581	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20582	}
20583
20584	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20585	///
20586	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
20587	#[inline]
20588	#[target_feature(enable = "avx512f")]
20589	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20590	#[cfg_attr(test, assert_instr(vpsllvq))]
20591	pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
20592	let shf: i64x8 = _mm512_sllv_epi64(a, count).as_i64x8();
20593	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
20594	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20595	}
20596
20597	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20598	///
20599	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
20600	#[inline]
20601	#[target_feature(enable = "avx512f,avx512vl")]
20602	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20603	#[cfg_attr(test, assert_instr(vpsllvq))]
20604	pub unsafe fn _mm256_mask_sllv_epi64(
20605	src: __m256i,
20606	k: __mmask8,
20607	a: __m256i,
20608	count: __m256i,
20609	) -> __m256i {
20610	let shf: i64x4 = _mm256_sllv_epi64(a, count).as_i64x4();
20611	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20612	}
20613
20614	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20615	///
20616	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
20617	#[inline]
20618	#[target_feature(enable = "avx512f,avx512vl")]
20619	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20620	#[cfg_attr(test, assert_instr(vpsllvq))]
20621	pub unsafe fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20622	let shf: i64x4 = _mm256_sllv_epi64(a, count).as_i64x4();
20623	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
20624	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20625	}
20626
20627	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20628	///
20629	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
20630	#[inline]
20631	#[target_feature(enable = "avx512f,avx512vl")]
20632	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20633	#[cfg_attr(test, assert_instr(vpsllvq))]
20634	pub unsafe fn _mm_mask_sllv_epi64(
20635	src: __m128i,
20636	k: __mmask8,
20637	a: __m128i,
20638	count: __m128i,
20639	) -> __m128i {
20640	let shf: i64x2 = _mm_sllv_epi64(a, count).as_i64x2();
20641	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20642	}
20643
20644	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20645	///
20646	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
20647	#[inline]
20648	#[target_feature(enable = "avx512f,avx512vl")]
20649	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20650	#[cfg_attr(test, assert_instr(vpsllvq))]
20651	pub unsafe fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20652	let shf: i64x2 = _mm_sllv_epi64(a, count).as_i64x2();
20653	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
20654	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20655	}
20656
20657	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
20658	///
20659	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
20660	#[inline]
20661	#[target_feature(enable = "avx512f")]
20662	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20663	#[cfg_attr(test, assert_instr(vpsrlvq))]
20664	pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
20665	transmute(src:vpsrlvq(a:a.as_i64x8(), b:count.as_i64x8()))
20666	}
20667
20668	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20669	///
20670	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
20671	#[inline]
20672	#[target_feature(enable = "avx512f")]
20673	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20674	#[cfg_attr(test, assert_instr(vpsrlvq))]
20675	pub unsafe fn _mm512_mask_srlv_epi64(
20676	src: __m512i,
20677	k: __mmask8,
20678	a: __m512i,
20679	count: __m512i,
20680	) -> __m512i {
20681	let shf: i64x8 = _mm512_srlv_epi64(a, count).as_i64x8();
20682	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20683	}
20684
20685	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20686	///
20687	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
20688	#[inline]
20689	#[target_feature(enable = "avx512f")]
20690	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20691	#[cfg_attr(test, assert_instr(vpsrlvq))]
20692	pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
20693	let shf: i64x8 = _mm512_srlv_epi64(a, count).as_i64x8();
20694	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
20695	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20696	}
20697
20698	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20699	///
20700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
20701	#[inline]
20702	#[target_feature(enable = "avx512f,avx512vl")]
20703	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20704	#[cfg_attr(test, assert_instr(vpsrlvq))]
20705	pub unsafe fn _mm256_mask_srlv_epi64(
20706	src: __m256i,
20707	k: __mmask8,
20708	a: __m256i,
20709	count: __m256i,
20710	) -> __m256i {
20711	let shf: i64x4 = _mm256_srlv_epi64(a, count).as_i64x4();
20712	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20713	}
20714
20715	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20716	///
20717	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
20718	#[inline]
20719	#[target_feature(enable = "avx512f,avx512vl")]
20720	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20721	#[cfg_attr(test, assert_instr(vpsrlvq))]
20722	pub unsafe fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20723	let shf: i64x4 = _mm256_srlv_epi64(a, count).as_i64x4();
20724	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
20725	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20726	}
20727
20728	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20729	///
20730	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
20731	#[inline]
20732	#[target_feature(enable = "avx512f,avx512vl")]
20733	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20734	#[cfg_attr(test, assert_instr(vpsrlvq))]
20735	pub unsafe fn _mm_mask_srlv_epi64(
20736	src: __m128i,
20737	k: __mmask8,
20738	a: __m128i,
20739	count: __m128i,
20740	) -> __m128i {
20741	let shf: i64x2 = _mm_srlv_epi64(a, count).as_i64x2();
20742	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20743	}
20744
20745	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20746	///
20747	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
20748	#[inline]
20749	#[target_feature(enable = "avx512f,avx512vl")]
20750	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20751	#[cfg_attr(test, assert_instr(vpsrlvq))]
20752	pub unsafe fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20753	let shf: i64x2 = _mm_srlv_epi64(a, count).as_i64x2();
20754	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
20755	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20756	}
20757
20758	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
20759	///
20760	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
20761	#[inline]
20762	#[target_feature(enable = "avx512f")]
20763	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20764	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
20765	#[rustc_legacy_const_generics(`1`)]
20766	pub unsafe fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
20767	static_assert_uimm_bits!(MASK, `8`);
20768	simd_shuffle!(
20769	a,
20770	a,
20771	[
20772	MASK as u32 & `0b11`,
20773	(MASK as u32 >> `2`) & `0b11`,
20774	((MASK as u32 >> `4`) & `0b11`),
20775	((MASK as u32 >> `6`) & `0b11`),
20776	(MASK as u32 & `0b11`) + `4`,
20777	((MASK as u32 >> `2`) & `0b11`) + `4`,
20778	((MASK as u32 >> `4`) & `0b11`) + `4`,
20779	((MASK as u32 >> `6`) & `0b11`) + `4`,
20780	(MASK as u32 & `0b11`) + `8`,
20781	((MASK as u32 >> `2`) & `0b11`) + `8`,
20782	((MASK as u32 >> `4`) & `0b11`) + `8`,
20783	((MASK as u32 >> `6`) & `0b11`) + `8`,
20784	(MASK as u32 & `0b11`) + `12`,
20785	((MASK as u32 >> `2`) & `0b11`) + `12`,
20786	((MASK as u32 >> `4`) & `0b11`) + `12`,
20787	((MASK as u32 >> `6`) & `0b11`) + `12`,
20788	],
20789	)
20790	}
20791
20792	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20793	///
20794	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
20795	#[inline]
20796	#[target_feature(enable = "avx512f")]
20797	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20798	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
20799	#[rustc_legacy_const_generics(`3`)]
20800	pub unsafe fn _mm512_mask_permute_ps<const MASK: i32>(
20801	src: __m512,
20802	k: __mmask16,
20803	a: __m512,
20804	) -> __m512 {
20805	static_assert_uimm_bits!(MASK, `8`);
20806	let r: __m512 = _mm512_permute_ps::<MASK>(a);
20807	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
20808	}
20809
20810	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20811	///
20812	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
20813	#[inline]
20814	#[target_feature(enable = "avx512f")]
20815	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20816	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
20817	#[rustc_legacy_const_generics(`2`)]
20818	pub unsafe fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
20819	static_assert_uimm_bits!(MASK, `8`);
20820	let r: __m512 = _mm512_permute_ps::<MASK>(a);
20821	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
20822	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:zero))
20823	}
20824
20825	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20826	///
20827	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
20828	#[inline]
20829	#[target_feature(enable = "avx512f,avx512vl")]
20830	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20831	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
20832	#[rustc_legacy_const_generics(`3`)]
20833	pub unsafe fn _mm256_mask_permute_ps<const MASK: i32>(
20834	src: __m256,
20835	k: __mmask8,
20836	a: __m256,
20837	) -> __m256 {
20838	let r: __m256 = _mm256_permute_ps::<MASK>(a);
20839	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
20840	}
20841
20842	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20843	///
20844	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
20845	#[inline]
20846	#[target_feature(enable = "avx512f,avx512vl")]
20847	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20848	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
20849	#[rustc_legacy_const_generics(`2`)]
20850	pub unsafe fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
20851	let r: __m256 = _mm256_permute_ps::<MASK>(a);
20852	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
20853	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:zero))
20854	}
20855
20856	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20857	///
20858	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
20859	#[inline]
20860	#[target_feature(enable = "avx512f,avx512vl")]
20861	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20862	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
20863	#[rustc_legacy_const_generics(`3`)]
20864	pub unsafe fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
20865	let r: __m128 = _mm_permute_ps::<MASK>(a);
20866	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
20867	}
20868
20869	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20870	///
20871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
20872	#[inline]
20873	#[target_feature(enable = "avx512f,avx512vl")]
20874	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20875	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
20876	#[rustc_legacy_const_generics(`2`)]
20877	pub unsafe fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
20878	let r: __m128 = _mm_permute_ps::<MASK>(a);
20879	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
20880	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:zero))
20881	}
20882
20883	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
20884	///
20885	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
20886	#[inline]
20887	#[target_feature(enable = "avx512f")]
20888	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20889	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01_10_01`))]
20890	#[rustc_legacy_const_generics(`1`)]
20891	pub unsafe fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
20892	static_assert_uimm_bits!(MASK, `8`);
20893	simd_shuffle!(
20894	a,
20895	a,
20896	[
20897	MASK as u32 & `0b1`,
20898	((MASK as u32 >> `1`) & `0b1`),
20899	((MASK as u32 >> `2`) & `0b1`) + `2`,
20900	((MASK as u32 >> `3`) & `0b1`) + `2`,
20901	((MASK as u32 >> `4`) & `0b1`) + `4`,
20902	((MASK as u32 >> `5`) & `0b1`) + `4`,
20903	((MASK as u32 >> `6`) & `0b1`) + `6`,
20904	((MASK as u32 >> `7`) & `0b1`) + `6`,
20905	],
20906	)
20907	}
20908
20909	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20910	///
20911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
20912	#[inline]
20913	#[target_feature(enable = "avx512f")]
20914	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20915	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01_10_01`))]
20916	#[rustc_legacy_const_generics(`3`)]
20917	pub unsafe fn _mm512_mask_permute_pd<const MASK: i32>(
20918	src: __m512d,
20919	k: __mmask8,
20920	a: __m512d,
20921	) -> __m512d {
20922	static_assert_uimm_bits!(MASK, `8`);
20923	let r: __m512d = _mm512_permute_pd::<MASK>(a);
20924	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
20925	}
20926
20927	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20928	///
20929	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
20930	#[inline]
20931	#[target_feature(enable = "avx512f")]
20932	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20933	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01_10_01`))]
20934	#[rustc_legacy_const_generics(`2`)]
20935	pub unsafe fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
20936	static_assert_uimm_bits!(MASK, `8`);
20937	let r: __m512d = _mm512_permute_pd::<MASK>(a);
20938	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
20939	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:zero))
20940	}
20941
20942	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20943	///
20944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
20945	#[inline]
20946	#[target_feature(enable = "avx512f,avx512vl")]
20947	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20948	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01`))]
20949	#[rustc_legacy_const_generics(`3`)]
20950	pub unsafe fn _mm256_mask_permute_pd<const MASK: i32>(
20951	src: __m256d,
20952	k: __mmask8,
20953	a: __m256d,
20954	) -> __m256d {
20955	static_assert_uimm_bits!(MASK, `4`);
20956	let r: __m256d = _mm256_permute_pd::<MASK>(a);
20957	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
20958	}
20959
20960	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20961	///
20962	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
20963	#[inline]
20964	#[target_feature(enable = "avx512f,avx512vl")]
20965	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20966	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01`))]
20967	#[rustc_legacy_const_generics(`2`)]
20968	pub unsafe fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
20969	static_assert_uimm_bits!(MASK, `4`);
20970	let r: __m256d = _mm256_permute_pd::<MASK>(a);
20971	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
20972	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:zero))
20973	}
20974
20975	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20976	///
20977	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
20978	#[inline]
20979	#[target_feature(enable = "avx512f,avx512vl")]
20980	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20981	#[cfg_attr(test, assert_instr(vshufpd, IMM2 = `0b01`))]
20982	#[rustc_legacy_const_generics(`3`)]
20983	pub unsafe fn _mm_mask_permute_pd<const IMM2: i32>(
20984	src: __m128d,
20985	k: __mmask8,
20986	a: __m128d,
20987	) -> __m128d {
20988	static_assert_uimm_bits!(IMM2, `2`);
20989	let r: __m128d = _mm_permute_pd::<IMM2>(a);
20990	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
20991	}
20992
20993	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20994	///
20995	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
20996	#[inline]
20997	#[target_feature(enable = "avx512f,avx512vl")]
20998	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20999	#[cfg_attr(test, assert_instr(vshufpd, IMM2 = `0b01`))]
21000	#[rustc_legacy_const_generics(`2`)]
21001	pub unsafe fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
21002	static_assert_uimm_bits!(IMM2, `2`);
21003	let r: __m128d = _mm_permute_pd::<IMM2>(a);
21004	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
21005	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:zero))
21006	}
21007
21008	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
21009	///
21010	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
21011	#[inline]
21012	#[target_feature(enable = "avx512f")]
21013	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21014	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
21015	#[rustc_legacy_const_generics(`1`)]
21016	pub unsafe fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
21017	static_assert_uimm_bits!(MASK, `8`);
21018	simd_shuffle!(
21019	a,
21020	a,
21021	[
21022	MASK as u32 & `0b11`,
21023	(MASK as u32 >> `2`) & `0b11`,
21024	((MASK as u32 >> `4`) & `0b11`),
21025	((MASK as u32 >> `6`) & `0b11`),
21026	(MASK as u32 & `0b11`) + `4`,
21027	((MASK as u32 >> `2`) & `0b11`) + `4`,
21028	((MASK as u32 >> `4`) & `0b11`) + `4`,
21029	((MASK as u32 >> `6`) & `0b11`) + `4`,
21030	],
21031	)
21032	}
21033
21034	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21035	///
21036	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
21037	#[inline]
21038	#[target_feature(enable = "avx512f")]
21039	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21040	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
21041	#[rustc_legacy_const_generics(`3`)]
21042	pub unsafe fn _mm512_mask_permutex_epi64<const MASK: i32>(
21043	src: __m512i,
21044	k: __mmask8,
21045	a: __m512i,
21046	) -> __m512i {
21047	static_assert_uimm_bits!(MASK, `8`);
21048	let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
21049	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
21050	}
21051
21052	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21053	///
21054	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
21055	#[inline]
21056	#[target_feature(enable = "avx512f")]
21057	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21058	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
21059	#[rustc_legacy_const_generics(`2`)]
21060	pub unsafe fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
21061	static_assert_uimm_bits!(MASK, `8`);
21062	let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
21063	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
21064	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:zero))
21065	}
21066
21067	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
21068	///
21069	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
21070	#[inline]
21071	#[target_feature(enable = "avx512f,avx512vl")]
21072	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21073	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
21074	#[rustc_legacy_const_generics(`1`)]
21075	pub unsafe fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
21076	static_assert_uimm_bits!(MASK, `8`);
21077	simd_shuffle!(
21078	a,
21079	a,
21080	[
21081	MASK as u32 & `0b11`,
21082	(MASK as u32 >> `2`) & `0b11`,
21083	((MASK as u32 >> `4`) & `0b11`),
21084	((MASK as u32 >> `6`) & `0b11`),
21085	],
21086	)
21087	}
21088
21089	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21090	///
21091	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi6&expand=4203)
21092	#[inline]
21093	#[target_feature(enable = "avx512f,avx512vl")]
21094	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21095	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
21096	#[rustc_legacy_const_generics(`3`)]
21097	pub unsafe fn _mm256_mask_permutex_epi64<const MASK: i32>(
21098	src: __m256i,
21099	k: __mmask8,
21100	a: __m256i,
21101	) -> __m256i {
21102	static_assert_uimm_bits!(MASK, `8`);
21103	let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
21104	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
21105	}
21106
21107	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21108	///
21109	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
21110	#[inline]
21111	#[target_feature(enable = "avx512f,avx512vl")]
21112	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21113	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
21114	#[rustc_legacy_const_generics(`2`)]
21115	pub unsafe fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
21116	static_assert_uimm_bits!(MASK, `8`);
21117	let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
21118	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
21119	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:zero))
21120	}
21121
21122	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
21123	///
21124	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
21125	#[inline]
21126	#[target_feature(enable = "avx512f")]
21127	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21128	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
21129	#[rustc_legacy_const_generics(`1`)]
21130	pub unsafe fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
21131	static_assert_uimm_bits!(MASK, `8`);
21132	simd_shuffle!(
21133	a,
21134	a,
21135	[
21136	MASK as u32 & `0b11`,
21137	(MASK as u32 >> `2`) & `0b11`,
21138	((MASK as u32 >> `4`) & `0b11`),
21139	((MASK as u32 >> `6`) & `0b11`),
21140	(MASK as u32 & `0b11`) + `4`,
21141	((MASK as u32 >> `2`) & `0b11`) + `4`,
21142	((MASK as u32 >> `4`) & `0b11`) + `4`,
21143	((MASK as u32 >> `6`) & `0b11`) + `4`,
21144	],
21145	)
21146	}
21147
21148	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21149	///
21150	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
21151	#[inline]
21152	#[target_feature(enable = "avx512f")]
21153	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21154	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
21155	#[rustc_legacy_const_generics(`3`)]
21156	pub unsafe fn _mm512_mask_permutex_pd<const MASK: i32>(
21157	src: __m512d,
21158	k: __mmask8,
21159	a: __m512d,
21160	) -> __m512d {
21161	let r: __m512d = _mm512_permutex_pd::<MASK>(a);
21162	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
21163	}
21164
21165	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21166	///
21167	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
21168	#[inline]
21169	#[target_feature(enable = "avx512f")]
21170	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21171	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
21172	#[rustc_legacy_const_generics(`2`)]
21173	pub unsafe fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
21174	let r: __m512d = _mm512_permutex_pd::<MASK>(a);
21175	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
21176	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:zero))
21177	}
21178
21179	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
21180	///
21181	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
21182	#[inline]
21183	#[target_feature(enable = "avx512f,avx512vl")]
21184	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21185	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
21186	#[rustc_legacy_const_generics(`1`)]
21187	pub unsafe fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
21188	static_assert_uimm_bits!(MASK, `8`);
21189	simd_shuffle!(
21190	a,
21191	a,
21192	[
21193	MASK as u32 & `0b11`,
21194	(MASK as u32 >> `2`) & `0b11`,
21195	((MASK as u32 >> `4`) & `0b11`),
21196	((MASK as u32 >> `6`) & `0b11`),
21197	],
21198	)
21199	}
21200
21201	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21202	///
21203	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
21204	#[inline]
21205	#[target_feature(enable = "avx512f,avx512vl")]
21206	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21207	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
21208	#[rustc_legacy_const_generics(`3`)]
21209	pub unsafe fn _mm256_mask_permutex_pd<const MASK: i32>(
21210	src: __m256d,
21211	k: __mmask8,
21212	a: __m256d,
21213	) -> __m256d {
21214	static_assert_uimm_bits!(MASK, `8`);
21215	let r: __m256d = _mm256_permutex_pd::<MASK>(a);
21216	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
21217	}
21218
21219	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21220	///
21221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
21222	#[inline]
21223	#[target_feature(enable = "avx512f,avx512vl")]
21224	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21225	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
21226	#[rustc_legacy_const_generics(`2`)]
21227	pub unsafe fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
21228	static_assert_uimm_bits!(MASK, `8`);
21229	let r: __m256d = _mm256_permutex_pd::<MASK>(a);
21230	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
21231	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:zero))
21232	}
21233
21234	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
21235	///
21236	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
21237	#[inline]
21238	#[target_feature(enable = "avx512f")]
21239	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21240	#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
21241	pub unsafe fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
21242	transmute(src:vpermd(a:a.as_i32x16(), idx:idx.as_i32x16()))
21243	}
21244
21245	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
21246	///
21247	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
21248	#[inline]
21249	#[target_feature(enable = "avx512f")]
21250	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21251	#[cfg_attr(test, assert_instr(vpermd))]
21252	pub unsafe fn _mm512_mask_permutevar_epi32(
21253	src: __m512i,
21254	k: __mmask16,
21255	idx: __m512i,
21256	a: __m512i,
21257	) -> __m512i {
21258	let permute: i32x16 = _mm512_permutevar_epi32(idx, a).as_i32x16();
21259	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
21260	}
21261
21262	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
21263	///
21264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
21265	#[inline]
21266	#[target_feature(enable = "avx512f")]
21267	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21268	#[cfg_attr(test, assert_instr(vpermilps))]
21269	pub unsafe fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
21270	transmute(src:vpermilps(a:a.as_f32x16(), b:b.as_i32x16()))
21271	}
21272
21273	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21274	///
21275	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
21276	#[inline]
21277	#[target_feature(enable = "avx512f")]
21278	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21279	#[cfg_attr(test, assert_instr(vpermilps))]
21280	pub unsafe fn _mm512_mask_permutevar_ps(
21281	src: __m512,
21282	k: __mmask16,
21283	a: __m512,
21284	b: __m512i,
21285	) -> __m512 {
21286	let permute: f32x16 = _mm512_permutevar_ps(a, b).as_f32x16();
21287	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
21288	}
21289
21290	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21291	///
21292	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
21293	#[inline]
21294	#[target_feature(enable = "avx512f")]
21295	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21296	#[cfg_attr(test, assert_instr(vpermilps))]
21297	pub unsafe fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
21298	let permute: f32x16 = _mm512_permutevar_ps(a, b).as_f32x16();
21299	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
21300	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21301	}
21302
21303	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21304	///
21305	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
21306	#[inline]
21307	#[target_feature(enable = "avx512f,avx512vl")]
21308	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21309	#[cfg_attr(test, assert_instr(vpermilps))]
21310	pub unsafe fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
21311	let permute: f32x8 = _mm256_permutevar_ps(a, b).as_f32x8();
21312	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
21313	}
21314
21315	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21316	///
21317	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
21318	#[inline]
21319	#[target_feature(enable = "avx512f,avx512vl")]
21320	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21321	#[cfg_attr(test, assert_instr(vpermilps))]
21322	pub unsafe fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
21323	let permute: f32x8 = _mm256_permutevar_ps(a, b).as_f32x8();
21324	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
21325	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21326	}
21327
21328	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21329	///
21330	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
21331	#[inline]
21332	#[target_feature(enable = "avx512f,avx512vl")]
21333	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21334	#[cfg_attr(test, assert_instr(vpermilps))]
21335	pub unsafe fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
21336	let permute: f32x4 = _mm_permutevar_ps(a, b).as_f32x4();
21337	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x4()))
21338	}
21339
21340	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21341	///
21342	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
21343	#[inline]
21344	#[target_feature(enable = "avx512f,avx512vl")]
21345	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21346	#[cfg_attr(test, assert_instr(vpermilps))]
21347	pub unsafe fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
21348	let permute: f32x4 = _mm_permutevar_ps(a, b).as_f32x4();
21349	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
21350	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21351	}
21352
21353	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
21354	///
21355	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
21356	#[inline]
21357	#[target_feature(enable = "avx512f")]
21358	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21359	#[cfg_attr(test, assert_instr(vpermilpd))]
21360	pub unsafe fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
21361	transmute(src:vpermilpd(a:a.as_f64x8(), b:b.as_i64x8()))
21362	}
21363
21364	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21365	///
21366	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
21367	#[inline]
21368	#[target_feature(enable = "avx512f")]
21369	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21370	#[cfg_attr(test, assert_instr(vpermilpd))]
21371	pub unsafe fn _mm512_mask_permutevar_pd(
21372	src: __m512d,
21373	k: __mmask8,
21374	a: __m512d,
21375	b: __m512i,
21376	) -> __m512d {
21377	let permute: f64x8 = _mm512_permutevar_pd(a, b).as_f64x8();
21378	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
21379	}
21380
21381	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21382	///
21383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
21384	#[inline]
21385	#[target_feature(enable = "avx512f")]
21386	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21387	#[cfg_attr(test, assert_instr(vpermilpd))]
21388	pub unsafe fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
21389	let permute: f64x8 = _mm512_permutevar_pd(a, b).as_f64x8();
21390	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
21391	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21392	}
21393
21394	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21395	///
21396	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
21397	#[inline]
21398	#[target_feature(enable = "avx512f,avx512vl")]
21399	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21400	#[cfg_attr(test, assert_instr(vpermilpd))]
21401	pub unsafe fn _mm256_mask_permutevar_pd(
21402	src: __m256d,
21403	k: __mmask8,
21404	a: __m256d,
21405	b: __m256i,
21406	) -> __m256d {
21407	let permute: f64x4 = _mm256_permutevar_pd(a, b).as_f64x4();
21408	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
21409	}
21410
21411	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21412	///
21413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
21414	#[inline]
21415	#[target_feature(enable = "avx512f,avx512vl")]
21416	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21417	#[cfg_attr(test, assert_instr(vpermilpd))]
21418	pub unsafe fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
21419	let permute: f64x4 = _mm256_permutevar_pd(a, b).as_f64x4();
21420	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
21421	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21422	}
21423
21424	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21425	///
21426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
21427	#[inline]
21428	#[target_feature(enable = "avx512f,avx512vl")]
21429	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21430	#[cfg_attr(test, assert_instr(vpermilpd))]
21431	pub unsafe fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
21432	let permute: f64x2 = _mm_permutevar_pd(a, b).as_f64x2();
21433	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x2()))
21434	}
21435
21436	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21437	///
21438	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
21439	#[inline]
21440	#[target_feature(enable = "avx512f,avx512vl")]
21441	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21442	#[cfg_attr(test, assert_instr(vpermilpd))]
21443	pub unsafe fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
21444	let permute: f64x2 = _mm_permutevar_pd(a, b).as_f64x2();
21445	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
21446	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21447	}
21448
21449	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
21450	///
21451	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
21452	#[inline]
21453	#[target_feature(enable = "avx512f")]
21454	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21455	#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
21456	pub unsafe fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
21457	transmute(src:vpermd(a:a.as_i32x16(), idx:idx.as_i32x16()))
21458	}
21459
21460	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21461	///
21462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
21463	#[inline]
21464	#[target_feature(enable = "avx512f")]
21465	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21466	#[cfg_attr(test, assert_instr(vpermd))]
21467	pub unsafe fn _mm512_mask_permutexvar_epi32(
21468	src: __m512i,
21469	k: __mmask16,
21470	idx: __m512i,
21471	a: __m512i,
21472	) -> __m512i {
21473	let permute: i32x16 = _mm512_permutexvar_epi32(idx, a).as_i32x16();
21474	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
21475	}
21476
21477	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21478	///
21479	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
21480	#[inline]
21481	#[target_feature(enable = "avx512f")]
21482	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21483	#[cfg_attr(test, assert_instr(vpermd))]
21484	pub unsafe fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
21485	let permute: i32x16 = _mm512_permutexvar_epi32(idx, a).as_i32x16();
21486	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
21487	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21488	}
21489
21490	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
21491	///
21492	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
21493	#[inline]
21494	#[target_feature(enable = "avx512f,avx512vl")]
21495	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21496	#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
21497	pub unsafe fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
21498	_mm256_permutevar8x32_epi32(a, b:idx) // llvm use llvm.x86.avx2.permd
21499	}
21500
21501	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21502	///
21503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
21504	#[inline]
21505	#[target_feature(enable = "avx512f,avx512vl")]
21506	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21507	#[cfg_attr(test, assert_instr(vpermd))]
21508	pub unsafe fn _mm256_mask_permutexvar_epi32(
21509	src: __m256i,
21510	k: __mmask8,
21511	idx: __m256i,
21512	a: __m256i,
21513	) -> __m256i {
21514	let permute: i32x8 = _mm256_permutexvar_epi32(idx, a).as_i32x8();
21515	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x8()))
21516	}
21517
21518	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21519	///
21520	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
21521	#[inline]
21522	#[target_feature(enable = "avx512f,avx512vl")]
21523	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21524	#[cfg_attr(test, assert_instr(vpermd))]
21525	pub unsafe fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
21526	let permute: i32x8 = _mm256_permutexvar_epi32(idx, a).as_i32x8();
21527	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
21528	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21529	}
21530
21531	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
21532	///
21533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
21534	#[inline]
21535	#[target_feature(enable = "avx512f")]
21536	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21537	#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
21538	pub unsafe fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
21539	transmute(src:vpermq(a:a.as_i64x8(), idx:idx.as_i64x8()))
21540	}
21541
21542	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21543	///
21544	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
21545	#[inline]
21546	#[target_feature(enable = "avx512f")]
21547	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21548	#[cfg_attr(test, assert_instr(vpermq))]
21549	pub unsafe fn _mm512_mask_permutexvar_epi64(
21550	src: __m512i,
21551	k: __mmask8,
21552	idx: __m512i,
21553	a: __m512i,
21554	) -> __m512i {
21555	let permute: i64x8 = _mm512_permutexvar_epi64(idx, a).as_i64x8();
21556	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x8()))
21557	}
21558
21559	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21560	///
21561	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
21562	#[inline]
21563	#[target_feature(enable = "avx512f")]
21564	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21565	#[cfg_attr(test, assert_instr(vpermq))]
21566	pub unsafe fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
21567	let permute: i64x8 = _mm512_permutexvar_epi64(idx, a).as_i64x8();
21568	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
21569	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21570	}
21571
21572	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
21573	///
21574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
21575	#[inline]
21576	#[target_feature(enable = "avx512f,avx512vl")]
21577	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21578	#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
21579	pub unsafe fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
21580	transmute(src:vpermq256(a:a.as_i64x4(), idx:idx.as_i64x4()))
21581	}
21582
21583	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21584	///
21585	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
21586	#[inline]
21587	#[target_feature(enable = "avx512f,avx512vl")]
21588	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21589	#[cfg_attr(test, assert_instr(vpermq))]
21590	pub unsafe fn _mm256_mask_permutexvar_epi64(
21591	src: __m256i,
21592	k: __mmask8,
21593	idx: __m256i,
21594	a: __m256i,
21595	) -> __m256i {
21596	let permute: i64x4 = _mm256_permutexvar_epi64(idx, a).as_i64x4();
21597	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x4()))
21598	}
21599
21600	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21601	///
21602	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
21603	#[inline]
21604	#[target_feature(enable = "avx512f,avx512vl")]
21605	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21606	#[cfg_attr(test, assert_instr(vpermq))]
21607	pub unsafe fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
21608	let permute: i64x4 = _mm256_permutexvar_epi64(idx, a).as_i64x4();
21609	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
21610	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21611	}
21612
21613	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
21614	///
21615	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
21616	#[inline]
21617	#[target_feature(enable = "avx512f")]
21618	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21619	#[cfg_attr(test, assert_instr(vpermps))]
21620	pub unsafe fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
21621	transmute(src:vpermps(a:a.as_f32x16(), idx:idx.as_i32x16()))
21622	}
21623
21624	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21625	///
21626	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
21627	#[inline]
21628	#[target_feature(enable = "avx512f")]
21629	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21630	#[cfg_attr(test, assert_instr(vpermps))]
21631	pub unsafe fn _mm512_mask_permutexvar_ps(
21632	src: __m512,
21633	k: __mmask16,
21634	idx: __m512i,
21635	a: __m512,
21636	) -> __m512 {
21637	let permute: f32x16 = _mm512_permutexvar_ps(idx, a).as_f32x16();
21638	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
21639	}
21640
21641	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21642	///
21643	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
21644	#[inline]
21645	#[target_feature(enable = "avx512f")]
21646	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21647	#[cfg_attr(test, assert_instr(vpermps))]
21648	pub unsafe fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
21649	let permute: f32x16 = _mm512_permutexvar_ps(idx, a).as_f32x16();
21650	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
21651	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21652	}
21653
21654	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
21655	///
21656	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
21657	#[inline]
21658	#[target_feature(enable = "avx512f,avx512vl")]
21659	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21660	#[cfg_attr(test, assert_instr(vpermps))]
21661	pub unsafe fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
21662	_mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
21663	}
21664
21665	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21666	///
21667	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
21668	#[inline]
21669	#[target_feature(enable = "avx512f,avx512vl")]
21670	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21671	#[cfg_attr(test, assert_instr(vpermps))]
21672	pub unsafe fn _mm256_mask_permutexvar_ps(
21673	src: __m256,
21674	k: __mmask8,
21675	idx: __m256i,
21676	a: __m256,
21677	) -> __m256 {
21678	let permute: f32x8 = _mm256_permutexvar_ps(idx, a).as_f32x8();
21679	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
21680	}
21681
21682	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21683	///
21684	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
21685	#[inline]
21686	#[target_feature(enable = "avx512f,avx512vl")]
21687	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21688	#[cfg_attr(test, assert_instr(vpermps))]
21689	pub unsafe fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
21690	let permute: f32x8 = _mm256_permutexvar_ps(idx, a).as_f32x8();
21691	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
21692	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21693	}
21694
21695	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
21696	///
21697	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
21698	#[inline]
21699	#[target_feature(enable = "avx512f")]
21700	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21701	#[cfg_attr(test, assert_instr(vpermpd))]
21702	pub unsafe fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
21703	transmute(src:vpermpd(a:a.as_f64x8(), idx:idx.as_i64x8()))
21704	}
21705
21706	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21707	///
21708	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
21709	#[inline]
21710	#[target_feature(enable = "avx512f")]
21711	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21712	#[cfg_attr(test, assert_instr(vpermpd))]
21713	pub unsafe fn _mm512_mask_permutexvar_pd(
21714	src: __m512d,
21715	k: __mmask8,
21716	idx: __m512i,
21717	a: __m512d,
21718	) -> __m512d {
21719	let permute: f64x8 = _mm512_permutexvar_pd(idx, a).as_f64x8();
21720	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
21721	}
21722
21723	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21724	///
21725	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
21726	#[inline]
21727	#[target_feature(enable = "avx512f")]
21728	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21729	#[cfg_attr(test, assert_instr(vpermpd))]
21730	pub unsafe fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
21731	let permute: f64x8 = _mm512_permutexvar_pd(idx, a).as_f64x8();
21732	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
21733	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21734	}
21735
21736	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
21737	///
21738	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
21739	#[inline]
21740	#[target_feature(enable = "avx512f,avx512vl")]
21741	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21742	#[cfg_attr(test, assert_instr(vpermpd))]
21743	pub unsafe fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
21744	transmute(src:vpermpd256(a:a.as_f64x4(), idx:idx.as_i64x4()))
21745	}
21746
21747	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21748	///
21749	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
21750	#[inline]
21751	#[target_feature(enable = "avx512f,avx512vl")]
21752	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21753	#[cfg_attr(test, assert_instr(vpermpd))]
21754	pub unsafe fn _mm256_mask_permutexvar_pd(
21755	src: __m256d,
21756	k: __mmask8,
21757	idx: __m256i,
21758	a: __m256d,
21759	) -> __m256d {
21760	let permute: f64x4 = _mm256_permutexvar_pd(idx, a).as_f64x4();
21761	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
21762	}
21763
21764	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21765	///
21766	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
21767	#[inline]
21768	#[target_feature(enable = "avx512f,avx512vl")]
21769	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21770	#[cfg_attr(test, assert_instr(vpermpd))]
21771	pub unsafe fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
21772	let permute: f64x4 = _mm256_permutexvar_pd(idx, a).as_f64x4();
21773	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
21774	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21775	}
21776
21777	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
21778	///
21779	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
21780	#[inline]
21781	#[target_feature(enable = "avx512f")]
21782	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21783	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21784	pub unsafe fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
21785	transmute(src:vpermi2d(a:a.as_i32x16(), idx:idx.as_i32x16(), b:b.as_i32x16()))
21786	}
21787
21788	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
21789	///
21790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
21791	#[inline]
21792	#[target_feature(enable = "avx512f")]
21793	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21794	#[cfg_attr(test, assert_instr(vpermt2d))]
21795	pub unsafe fn _mm512_mask_permutex2var_epi32(
21796	a: __m512i,
21797	k: __mmask16,
21798	idx: __m512i,
21799	b: __m512i,
21800	) -> __m512i {
21801	let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
21802	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x16()))
21803	}
21804
21805	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21806	///
21807	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
21808	#[inline]
21809	#[target_feature(enable = "avx512f")]
21810	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21811	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21812	pub unsafe fn _mm512_maskz_permutex2var_epi32(
21813	k: __mmask16,
21814	a: __m512i,
21815	idx: __m512i,
21816	b: __m512i,
21817	) -> __m512i {
21818	let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
21819	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
21820	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21821	}
21822
21823	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
21824	///
21825	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
21826	#[inline]
21827	#[target_feature(enable = "avx512f")]
21828	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21829	#[cfg_attr(test, assert_instr(vpermi2d))]
21830	pub unsafe fn _mm512_mask2_permutex2var_epi32(
21831	a: __m512i,
21832	idx: __m512i,
21833	k: __mmask16,
21834	b: __m512i,
21835	) -> __m512i {
21836	let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
21837	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x16()))
21838	}
21839
21840	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
21841	///
21842	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
21843	#[inline]
21844	#[target_feature(enable = "avx512f,avx512vl")]
21845	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21846	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21847	pub unsafe fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
21848	transmute(src:vpermi2d256(a:a.as_i32x8(), idx:idx.as_i32x8(), b:b.as_i32x8()))
21849	}
21850
21851	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
21852	///
21853	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
21854	#[inline]
21855	#[target_feature(enable = "avx512f,avx512vl")]
21856	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21857	#[cfg_attr(test, assert_instr(vpermt2d))]
21858	pub unsafe fn _mm256_mask_permutex2var_epi32(
21859	a: __m256i,
21860	k: __mmask8,
21861	idx: __m256i,
21862	b: __m256i,
21863	) -> __m256i {
21864	let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
21865	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x8()))
21866	}
21867
21868	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21869	///
21870	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
21871	#[inline]
21872	#[target_feature(enable = "avx512f,avx512vl")]
21873	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21874	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21875	pub unsafe fn _mm256_maskz_permutex2var_epi32(
21876	k: __mmask8,
21877	a: __m256i,
21878	idx: __m256i,
21879	b: __m256i,
21880	) -> __m256i {
21881	let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
21882	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
21883	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21884	}
21885
21886	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
21887	///
21888	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
21889	#[inline]
21890	#[target_feature(enable = "avx512f,avx512vl")]
21891	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21892	#[cfg_attr(test, assert_instr(vpermi2d))]
21893	pub unsafe fn _mm256_mask2_permutex2var_epi32(
21894	a: __m256i,
21895	idx: __m256i,
21896	k: __mmask8,
21897	b: __m256i,
21898	) -> __m256i {
21899	let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
21900	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x8()))
21901	}
21902
21903	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
21904	///
21905	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
21906	#[inline]
21907	#[target_feature(enable = "avx512f,avx512vl")]
21908	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21909	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21910	pub unsafe fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
21911	transmute(src:vpermi2d128(a:a.as_i32x4(), idx:idx.as_i32x4(), b:b.as_i32x4()))
21912	}
21913
21914	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
21915	///
21916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
21917	#[inline]
21918	#[target_feature(enable = "avx512f,avx512vl")]
21919	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21920	#[cfg_attr(test, assert_instr(vpermt2d))]
21921	pub unsafe fn _mm_mask_permutex2var_epi32(
21922	a: __m128i,
21923	k: __mmask8,
21924	idx: __m128i,
21925	b: __m128i,
21926	) -> __m128i {
21927	let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
21928	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x4()))
21929	}
21930
21931	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21932	///
21933	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
21934	#[inline]
21935	#[target_feature(enable = "avx512f,avx512vl")]
21936	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21937	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21938	pub unsafe fn _mm_maskz_permutex2var_epi32(
21939	k: __mmask8,
21940	a: __m128i,
21941	idx: __m128i,
21942	b: __m128i,
21943	) -> __m128i {
21944	let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
21945	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
21946	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21947	}
21948
21949	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
21950	///
21951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
21952	#[inline]
21953	#[target_feature(enable = "avx512f,avx512vl")]
21954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21955	#[cfg_attr(test, assert_instr(vpermi2d))]
21956	pub unsafe fn _mm_mask2_permutex2var_epi32(
21957	a: __m128i,
21958	idx: __m128i,
21959	k: __mmask8,
21960	b: __m128i,
21961	) -> __m128i {
21962	let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
21963	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x4()))
21964	}
21965
21966	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
21967	///
21968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
21969	#[inline]
21970	#[target_feature(enable = "avx512f")]
21971	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21972	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
21973	pub unsafe fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
21974	transmute(src:vpermi2q(a:a.as_i64x8(), idx:idx.as_i64x8(), b:b.as_i64x8()))
21975	}
21976
21977	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
21978	///
21979	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
21980	#[inline]
21981	#[target_feature(enable = "avx512f")]
21982	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21983	#[cfg_attr(test, assert_instr(vpermt2q))]
21984	pub unsafe fn _mm512_mask_permutex2var_epi64(
21985	a: __m512i,
21986	k: __mmask8,
21987	idx: __m512i,
21988	b: __m512i,
21989	) -> __m512i {
21990	let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
21991	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x8()))
21992	}
21993
21994	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21995	///
21996	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
21997	#[inline]
21998	#[target_feature(enable = "avx512f")]
21999	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22000	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22001	pub unsafe fn _mm512_maskz_permutex2var_epi64(
22002	k: __mmask8,
22003	a: __m512i,
22004	idx: __m512i,
22005	b: __m512i,
22006	) -> __m512i {
22007	let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
22008	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
22009	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22010	}
22011
22012	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22013	///
22014	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
22015	#[inline]
22016	#[target_feature(enable = "avx512f")]
22017	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22018	#[cfg_attr(test, assert_instr(vpermi2q))]
22019	pub unsafe fn _mm512_mask2_permutex2var_epi64(
22020	a: __m512i,
22021	idx: __m512i,
22022	k: __mmask8,
22023	b: __m512i,
22024	) -> __m512i {
22025	let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
22026	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x8()))
22027	}
22028
22029	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22030	///
22031	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
22032	#[inline]
22033	#[target_feature(enable = "avx512f,avx512vl")]
22034	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22035	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22036	pub unsafe fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
22037	transmute(src:vpermi2q256(a:a.as_i64x4(), idx:idx.as_i64x4(), b:b.as_i64x4()))
22038	}
22039
22040	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22041	///
22042	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
22043	#[inline]
22044	#[target_feature(enable = "avx512f,avx512vl")]
22045	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22046	#[cfg_attr(test, assert_instr(vpermt2q))]
22047	pub unsafe fn _mm256_mask_permutex2var_epi64(
22048	a: __m256i,
22049	k: __mmask8,
22050	idx: __m256i,
22051	b: __m256i,
22052	) -> __m256i {
22053	let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
22054	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x4()))
22055	}
22056
22057	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22058	///
22059	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
22060	#[inline]
22061	#[target_feature(enable = "avx512f,avx512vl")]
22062	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22063	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22064	pub unsafe fn _mm256_maskz_permutex2var_epi64(
22065	k: __mmask8,
22066	a: __m256i,
22067	idx: __m256i,
22068	b: __m256i,
22069	) -> __m256i {
22070	let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
22071	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
22072	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22073	}
22074
22075	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22076	///
22077	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
22078	#[inline]
22079	#[target_feature(enable = "avx512f,avx512vl")]
22080	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22081	#[cfg_attr(test, assert_instr(vpermi2q))]
22082	pub unsafe fn _mm256_mask2_permutex2var_epi64(
22083	a: __m256i,
22084	idx: __m256i,
22085	k: __mmask8,
22086	b: __m256i,
22087	) -> __m256i {
22088	let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
22089	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x4()))
22090	}
22091
22092	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22093	///
22094	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
22095	#[inline]
22096	#[target_feature(enable = "avx512f,avx512vl")]
22097	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22098	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22099	pub unsafe fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
22100	transmute(src:vpermi2q128(a:a.as_i64x2(), idx:idx.as_i64x2(), b:b.as_i64x2()))
22101	}
22102
22103	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22104	///
22105	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
22106	#[inline]
22107	#[target_feature(enable = "avx512f,avx512vl")]
22108	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22109	#[cfg_attr(test, assert_instr(vpermt2q))]
22110	pub unsafe fn _mm_mask_permutex2var_epi64(
22111	a: __m128i,
22112	k: __mmask8,
22113	idx: __m128i,
22114	b: __m128i,
22115	) -> __m128i {
22116	let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
22117	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x2()))
22118	}
22119
22120	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22121	///
22122	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
22123	#[inline]
22124	#[target_feature(enable = "avx512f,avx512vl")]
22125	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22126	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22127	pub unsafe fn _mm_maskz_permutex2var_epi64(
22128	k: __mmask8,
22129	a: __m128i,
22130	idx: __m128i,
22131	b: __m128i,
22132	) -> __m128i {
22133	let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
22134	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
22135	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22136	}
22137
22138	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22139	///
22140	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
22141	#[inline]
22142	#[target_feature(enable = "avx512f,avx512vl")]
22143	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22144	#[cfg_attr(test, assert_instr(vpermi2q))]
22145	pub unsafe fn _mm_mask2_permutex2var_epi64(
22146	a: __m128i,
22147	idx: __m128i,
22148	k: __mmask8,
22149	b: __m128i,
22150	) -> __m128i {
22151	let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
22152	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x2()))
22153	}
22154
22155	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22156	///
22157	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
22158	#[inline]
22159	#[target_feature(enable = "avx512f")]
22160	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22161	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22162	pub unsafe fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
22163	transmute(src:vpermi2ps(a:a.as_f32x16(), idx:idx.as_i32x16(), b:b.as_f32x16()))
22164	}
22165
22166	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22167	///
22168	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
22169	#[inline]
22170	#[target_feature(enable = "avx512f")]
22171	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22172	#[cfg_attr(test, assert_instr(vpermt2ps))]
22173	pub unsafe fn _mm512_mask_permutex2var_ps(
22174	a: __m512,
22175	k: __mmask16,
22176	idx: __m512i,
22177	b: __m512,
22178	) -> __m512 {
22179	let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
22180	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x16()))
22181	}
22182
22183	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22184	///
22185	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
22186	#[inline]
22187	#[target_feature(enable = "avx512f")]
22188	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22189	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22190	pub unsafe fn _mm512_maskz_permutex2var_ps(
22191	k: __mmask16,
22192	a: __m512,
22193	idx: __m512i,
22194	b: __m512,
22195	) -> __m512 {
22196	let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
22197	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
22198	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22199	}
22200
22201	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22202	///
22203	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
22204	#[inline]
22205	#[target_feature(enable = "avx512f")]
22206	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22207	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
22208	pub unsafe fn _mm512_mask2_permutex2var_ps(
22209	a: __m512,
22210	idx: __m512i,
22211	k: __mmask16,
22212	b: __m512,
22213	) -> __m512 {
22214	let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
22215	let idx: f32x16 = _mm512_castsi512_ps(idx).as_f32x16();
22216	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
22217	}
22218
22219	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22220	///
22221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
22222	#[inline]
22223	#[target_feature(enable = "avx512f,avx512vl")]
22224	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22225	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22226	pub unsafe fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
22227	transmute(src:vpermi2ps256(a:a.as_f32x8(), idx:idx.as_i32x8(), b:b.as_f32x8()))
22228	}
22229
22230	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22231	///
22232	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
22233	#[inline]
22234	#[target_feature(enable = "avx512f,avx512vl")]
22235	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22236	#[cfg_attr(test, assert_instr(vpermt2ps))]
22237	pub unsafe fn _mm256_mask_permutex2var_ps(
22238	a: __m256,
22239	k: __mmask8,
22240	idx: __m256i,
22241	b: __m256,
22242	) -> __m256 {
22243	let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
22244	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x8()))
22245	}
22246
22247	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22248	///
22249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
22250	#[inline]
22251	#[target_feature(enable = "avx512f,avx512vl")]
22252	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22253	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22254	pub unsafe fn _mm256_maskz_permutex2var_ps(
22255	k: __mmask8,
22256	a: __m256,
22257	idx: __m256i,
22258	b: __m256,
22259	) -> __m256 {
22260	let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
22261	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
22262	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22263	}
22264
22265	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22266	///
22267	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
22268	#[inline]
22269	#[target_feature(enable = "avx512f,avx512vl")]
22270	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22271	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
22272	pub unsafe fn _mm256_mask2_permutex2var_ps(
22273	a: __m256,
22274	idx: __m256i,
22275	k: __mmask8,
22276	b: __m256,
22277	) -> __m256 {
22278	let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
22279	let idx: f32x8 = _mm256_castsi256_ps(idx).as_f32x8();
22280	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
22281	}
22282
22283	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22284	///
22285	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
22286	#[inline]
22287	#[target_feature(enable = "avx512f,avx512vl")]
22288	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22289	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22290	pub unsafe fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
22291	transmute(src:vpermi2ps128(a:a.as_f32x4(), idx:idx.as_i32x4(), b:b.as_f32x4()))
22292	}
22293
22294	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22295	///
22296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
22297	#[inline]
22298	#[target_feature(enable = "avx512f,avx512vl")]
22299	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22300	#[cfg_attr(test, assert_instr(vpermt2ps))]
22301	pub unsafe fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
22302	let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
22303	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x4()))
22304	}
22305
22306	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22307	///
22308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
22309	#[inline]
22310	#[target_feature(enable = "avx512f,avx512vl")]
22311	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22312	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22313	pub unsafe fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
22314	let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
22315	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
22316	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22317	}
22318
22319	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22320	///
22321	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
22322	#[inline]
22323	#[target_feature(enable = "avx512f,avx512vl")]
22324	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22325	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
22326	pub unsafe fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
22327	let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
22328	let idx: f32x4 = _mm_castsi128_ps(idx).as_f32x4();
22329	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
22330	}
22331
22332	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22333	///
22334	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
22335	#[inline]
22336	#[target_feature(enable = "avx512f")]
22337	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22338	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22339	pub unsafe fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
22340	transmute(src:vpermi2pd(a:a.as_f64x8(), idx:idx.as_i64x8(), b:b.as_f64x8()))
22341	}
22342
22343	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22344	///
22345	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
22346	#[inline]
22347	#[target_feature(enable = "avx512f")]
22348	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22349	#[cfg_attr(test, assert_instr(vpermt2pd))]
22350	pub unsafe fn _mm512_mask_permutex2var_pd(
22351	a: __m512d,
22352	k: __mmask8,
22353	idx: __m512i,
22354	b: __m512d,
22355	) -> __m512d {
22356	let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
22357	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x8()))
22358	}
22359
22360	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22361	///
22362	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
22363	#[inline]
22364	#[target_feature(enable = "avx512f")]
22365	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22366	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22367	pub unsafe fn _mm512_maskz_permutex2var_pd(
22368	k: __mmask8,
22369	a: __m512d,
22370	idx: __m512i,
22371	b: __m512d,
22372	) -> __m512d {
22373	let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
22374	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
22375	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22376	}
22377
22378	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
22379	///
22380	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
22381	#[inline]
22382	#[target_feature(enable = "avx512f")]
22383	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22384	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
22385	pub unsafe fn _mm512_mask2_permutex2var_pd(
22386	a: __m512d,
22387	idx: __m512i,
22388	k: __mmask8,
22389	b: __m512d,
22390	) -> __m512d {
22391	let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
22392	let idx: f64x8 = _mm512_castsi512_pd(idx).as_f64x8();
22393	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
22394	}
22395
22396	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22397	///
22398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
22399	#[inline]
22400	#[target_feature(enable = "avx512f,avx512vl")]
22401	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22402	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22403	pub unsafe fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
22404	transmute(src:vpermi2pd256(a:a.as_f64x4(), idx:idx.as_i64x4(), b:b.as_f64x4()))
22405	}
22406
22407	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22408	///
22409	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
22410	#[inline]
22411	#[target_feature(enable = "avx512f,avx512vl")]
22412	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22413	#[cfg_attr(test, assert_instr(vpermt2pd))]
22414	pub unsafe fn _mm256_mask_permutex2var_pd(
22415	a: __m256d,
22416	k: __mmask8,
22417	idx: __m256i,
22418	b: __m256d,
22419	) -> __m256d {
22420	let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
22421	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x4()))
22422	}
22423
22424	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22425	///
22426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
22427	#[inline]
22428	#[target_feature(enable = "avx512f,avx512vl")]
22429	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22430	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22431	pub unsafe fn _mm256_maskz_permutex2var_pd(
22432	k: __mmask8,
22433	a: __m256d,
22434	idx: __m256i,
22435	b: __m256d,
22436	) -> __m256d {
22437	let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
22438	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
22439	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22440	}
22441
22442	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
22443	///
22444	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
22445	#[inline]
22446	#[target_feature(enable = "avx512f,avx512vl")]
22447	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22448	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
22449	pub unsafe fn _mm256_mask2_permutex2var_pd(
22450	a: __m256d,
22451	idx: __m256i,
22452	k: __mmask8,
22453	b: __m256d,
22454	) -> __m256d {
22455	let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
22456	let idx: f64x4 = _mm256_castsi256_pd(idx).as_f64x4();
22457	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
22458	}
22459
22460	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22461	///
22462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
22463	#[inline]
22464	#[target_feature(enable = "avx512f,avx512vl")]
22465	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22466	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22467	pub unsafe fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
22468	transmute(src:vpermi2pd128(a:a.as_f64x2(), idx:idx.as_i64x2(), b:b.as_f64x2()))
22469	}
22470
22471	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22472	///
22473	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
22474	#[inline]
22475	#[target_feature(enable = "avx512f,avx512vl")]
22476	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22477	#[cfg_attr(test, assert_instr(vpermt2pd))]
22478	pub unsafe fn _mm_mask_permutex2var_pd(
22479	a: __m128d,
22480	k: __mmask8,
22481	idx: __m128i,
22482	b: __m128d,
22483	) -> __m128d {
22484	let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
22485	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x2()))
22486	}
22487
22488	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22489	///
22490	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
22491	#[inline]
22492	#[target_feature(enable = "avx512f,avx512vl")]
22493	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22494	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22495	pub unsafe fn _mm_maskz_permutex2var_pd(
22496	k: __mmask8,
22497	a: __m128d,
22498	idx: __m128i,
22499	b: __m128d,
22500	) -> __m128d {
22501	let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
22502	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
22503	transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22504	}
22505
22506	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
22507	///
22508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
22509	#[inline]
22510	#[target_feature(enable = "avx512f,avx512vl")]
22511	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22512	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
22513	pub unsafe fn _mm_mask2_permutex2var_pd(
22514	a: __m128d,
22515	idx: __m128i,
22516	k: __mmask8,
22517	b: __m128d,
22518	) -> __m128d {
22519	let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
22520	let idx: f64x2 = _mm_castsi128_pd(idx).as_f64x2();
22521	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
22522	}
22523
22524	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22525	///
22526	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_shuffle_epi32&expand=5150)
22527	#[inline]
22528	#[target_feature(enable = "avx512f")]
22529	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22530	#[cfg_attr(test, assert_instr(vshufps, MASK = `9`))] //should be vpshufd
22531	#[rustc_legacy_const_generics(`1`)]
22532	pub unsafe fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
22533	static_assert_uimm_bits!(MASK, `8`);
22534	let r: i32x16 = simd_shuffle!(
22535	a.as_i32x16(),
22536	a.as_i32x16(),
22537	[
22538	MASK as u32 & `0b11`,
22539	(MASK as u32 >> `2`) & `0b11`,
22540	(MASK as u32 >> `4`) & `0b11`,
22541	(MASK as u32 >> `6`) & `0b11`,
22542	(MASK as u32 & `0b11`) + `4`,
22543	((MASK as u32 >> `2`) & `0b11`) + `4`,
22544	((MASK as u32 >> `4`) & `0b11`) + `4`,
22545	((MASK as u32 >> `6`) & `0b11`) + `4`,
22546	(MASK as u32 & `0b11`) + `8`,
22547	((MASK as u32 >> `2`) & `0b11`) + `8`,
22548	((MASK as u32 >> `4`) & `0b11`) + `8`,
22549	((MASK as u32 >> `6`) & `0b11`) + `8`,
22550	(MASK as u32 & `0b11`) + `12`,
22551	((MASK as u32 >> `2`) & `0b11`) + `12`,
22552	((MASK as u32 >> `4`) & `0b11`) + `12`,
22553	((MASK as u32 >> `6`) & `0b11`) + `12`,
22554	],
22555	);
22556	transmute(r)
22557	}
22558
22559	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22560	///
22561	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
22562	#[inline]
22563	#[target_feature(enable = "avx512f")]
22564	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22565	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
22566	#[rustc_legacy_const_generics(`3`)]
22567	pub unsafe fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22568	src: __m512i,
22569	k: __mmask16,
22570	a: __m512i,
22571	) -> __m512i {
22572	static_assert_uimm_bits!(MASK, `8`);
22573	let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
22574	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
22575	}
22576
22577	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22578	///
22579	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
22580	#[inline]
22581	#[target_feature(enable = "avx512f")]
22582	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22583	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
22584	#[rustc_legacy_const_generics(`2`)]
22585	pub unsafe fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22586	k: __mmask16,
22587	a: __m512i,
22588	) -> __m512i {
22589	static_assert_uimm_bits!(MASK, `8`);
22590	let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
22591	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
22592	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:zero))
22593	}
22594
22595	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22596	///
22597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
22598	#[inline]
22599	#[target_feature(enable = "avx512f,avx512vl")]
22600	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22601	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
22602	#[rustc_legacy_const_generics(`3`)]
22603	pub unsafe fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22604	src: __m256i,
22605	k: __mmask8,
22606	a: __m256i,
22607	) -> __m256i {
22608	static_assert_uimm_bits!(MASK, `8`);
22609	let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
22610	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
22611	}
22612
22613	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22614	///
22615	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
22616	#[inline]
22617	#[target_feature(enable = "avx512f,avx512vl")]
22618	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22619	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
22620	#[rustc_legacy_const_generics(`2`)]
22621	pub unsafe fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22622	k: __mmask8,
22623	a: __m256i,
22624	) -> __m256i {
22625	static_assert_uimm_bits!(MASK, `8`);
22626	let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
22627	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
22628	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:zero))
22629	}
22630
22631	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22632	///
22633	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
22634	#[inline]
22635	#[target_feature(enable = "avx512f,avx512vl")]
22636	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22637	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
22638	#[rustc_legacy_const_generics(`3`)]
22639	pub unsafe fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22640	src: __m128i,
22641	k: __mmask8,
22642	a: __m128i,
22643	) -> __m128i {
22644	static_assert_uimm_bits!(MASK, `8`);
22645	let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
22646	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
22647	}
22648
22649	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22650	///
22651	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
22652	#[inline]
22653	#[target_feature(enable = "avx512f,avx512vl")]
22654	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22655	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
22656	#[rustc_legacy_const_generics(`2`)]
22657	pub unsafe fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22658	k: __mmask8,
22659	a: __m128i,
22660	) -> __m128i {
22661	static_assert_uimm_bits!(MASK, `8`);
22662	let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
22663	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
22664	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:zero))
22665	}
22666
22667	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22668	///
22669	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
22670	#[inline]
22671	#[target_feature(enable = "avx512f")]
22672	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22673	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
22674	#[rustc_legacy_const_generics(`2`)]
22675	pub unsafe fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
22676	static_assert_uimm_bits!(MASK, `8`);
22677	simd_shuffle!(
22678	a,
22679	b,
22680	[
22681	MASK as u32 & `0b11`,
22682	(MASK as u32 >> `2`) & `0b11`,
22683	((MASK as u32 >> `4`) & `0b11`) + `16`,
22684	((MASK as u32 >> `6`) & `0b11`) + `16`,
22685	(MASK as u32 & `0b11`) + `4`,
22686	((MASK as u32 >> `2`) & `0b11`) + `4`,
22687	((MASK as u32 >> `4`) & `0b11`) + `20`,
22688	((MASK as u32 >> `6`) & `0b11`) + `20`,
22689	(MASK as u32 & `0b11`) + `8`,
22690	((MASK as u32 >> `2`) & `0b11`) + `8`,
22691	((MASK as u32 >> `4`) & `0b11`) + `24`,
22692	((MASK as u32 >> `6`) & `0b11`) + `24`,
22693	(MASK as u32 & `0b11`) + `12`,
22694	((MASK as u32 >> `2`) & `0b11`) + `12`,
22695	((MASK as u32 >> `4`) & `0b11`) + `28`,
22696	((MASK as u32 >> `6`) & `0b11`) + `28`,
22697	],
22698	)
22699	}
22700
22701	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22702	///
22703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
22704	#[inline]
22705	#[target_feature(enable = "avx512f")]
22706	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22707	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
22708	#[rustc_legacy_const_generics(`4`)]
22709	pub unsafe fn _mm512_mask_shuffle_ps<const MASK: i32>(
22710	src: __m512,
22711	k: __mmask16,
22712	a: __m512,
22713	b: __m512,
22714	) -> __m512 {
22715	static_assert_uimm_bits!(MASK, `8`);
22716	let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
22717	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
22718	}
22719
22720	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22721	///
22722	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
22723	#[inline]
22724	#[target_feature(enable = "avx512f")]
22725	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22726	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
22727	#[rustc_legacy_const_generics(`3`)]
22728	pub unsafe fn _mm512_maskz_shuffle_ps<const MASK: i32>(
22729	k: __mmask16,
22730	a: __m512,
22731	b: __m512,
22732	) -> __m512 {
22733	static_assert_uimm_bits!(MASK, `8`);
22734	let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
22735	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
22736	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:zero))
22737	}
22738
22739	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22740	///
22741	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
22742	#[inline]
22743	#[target_feature(enable = "avx512f,avx512vl")]
22744	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22745	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
22746	#[rustc_legacy_const_generics(`4`)]
22747	pub unsafe fn _mm256_mask_shuffle_ps<const MASK: i32>(
22748	src: __m256,
22749	k: __mmask8,
22750	a: __m256,
22751	b: __m256,
22752	) -> __m256 {
22753	static_assert_uimm_bits!(MASK, `8`);
22754	let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
22755	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
22756	}
22757
22758	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22759	///
22760	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
22761	#[inline]
22762	#[target_feature(enable = "avx512f,avx512vl")]
22763	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22764	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
22765	#[rustc_legacy_const_generics(`3`)]
22766	pub unsafe fn _mm256_maskz_shuffle_ps<const MASK: i32>(
22767	k: __mmask8,
22768	a: __m256,
22769	b: __m256,
22770	) -> __m256 {
22771	static_assert_uimm_bits!(MASK, `8`);
22772	let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
22773	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
22774	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:zero))
22775	}
22776
22777	/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22778	///
22779	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
22780	#[inline]
22781	#[target_feature(enable = "avx512f,avx512vl")]
22782	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22783	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
22784	#[rustc_legacy_const_generics(`4`)]
22785	pub unsafe fn _mm_mask_shuffle_ps<const MASK: i32>(
22786	src: __m128,
22787	k: __mmask8,
22788	a: __m128,
22789	b: __m128,
22790	) -> __m128 {
22791	static_assert_uimm_bits!(MASK, `8`);
22792	let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
22793	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
22794	}
22795
22796	/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22797	///
22798	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
22799	#[inline]
22800	#[target_feature(enable = "avx512f,avx512vl")]
22801	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22802	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
22803	#[rustc_legacy_const_generics(`3`)]
22804	pub unsafe fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
22805	static_assert_uimm_bits!(MASK, `8`);
22806	let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
22807	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
22808	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:zero))
22809	}
22810
22811	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
22812	///
22813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
22814	#[inline]
22815	#[target_feature(enable = "avx512f")]
22816	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22817	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
22818	#[rustc_legacy_const_generics(`2`)]
22819	pub unsafe fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
22820	static_assert_uimm_bits!(MASK, `8`);
22821	simd_shuffle!(
22822	a,
22823	b,
22824	[
22825	MASK as u32 & `0b1`,
22826	((MASK as u32 >> `1`) & `0b1`) + `8`,
22827	((MASK as u32 >> `2`) & `0b1`) + `2`,
22828	((MASK as u32 >> `3`) & `0b1`) + `10`,
22829	((MASK as u32 >> `4`) & `0b1`) + `4`,
22830	((MASK as u32 >> `5`) & `0b1`) + `12`,
22831	((MASK as u32 >> `6`) & `0b1`) + `6`,
22832	((MASK as u32 >> `7`) & `0b1`) + `14`,
22833	],
22834	)
22835	}
22836
22837	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22838	///
22839	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
22840	#[inline]
22841	#[target_feature(enable = "avx512f")]
22842	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22843	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
22844	#[rustc_legacy_const_generics(`4`)]
22845	pub unsafe fn _mm512_mask_shuffle_pd<const MASK: i32>(
22846	src: __m512d,
22847	k: __mmask8,
22848	a: __m512d,
22849	b: __m512d,
22850	) -> __m512d {
22851	static_assert_uimm_bits!(MASK, `8`);
22852	let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
22853	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
22854	}
22855
22856	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22857	///
22858	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
22859	#[inline]
22860	#[target_feature(enable = "avx512f")]
22861	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22862	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
22863	#[rustc_legacy_const_generics(`3`)]
22864	pub unsafe fn _mm512_maskz_shuffle_pd<const MASK: i32>(
22865	k: __mmask8,
22866	a: __m512d,
22867	b: __m512d,
22868	) -> __m512d {
22869	static_assert_uimm_bits!(MASK, `8`);
22870	let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
22871	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
22872	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:zero))
22873	}
22874
22875	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22876	///
22877	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
22878	#[inline]
22879	#[target_feature(enable = "avx512f,avx512vl")]
22880	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22881	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
22882	#[rustc_legacy_const_generics(`4`)]
22883	pub unsafe fn _mm256_mask_shuffle_pd<const MASK: i32>(
22884	src: __m256d,
22885	k: __mmask8,
22886	a: __m256d,
22887	b: __m256d,
22888	) -> __m256d {
22889	static_assert_uimm_bits!(MASK, `8`);
22890	let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
22891	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
22892	}
22893
22894	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22895	///
22896	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
22897	#[inline]
22898	#[target_feature(enable = "avx512f,avx512vl")]
22899	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22900	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
22901	#[rustc_legacy_const_generics(`3`)]
22902	pub unsafe fn _mm256_maskz_shuffle_pd<const MASK: i32>(
22903	k: __mmask8,
22904	a: __m256d,
22905	b: __m256d,
22906	) -> __m256d {
22907	static_assert_uimm_bits!(MASK, `8`);
22908	let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
22909	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
22910	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:zero))
22911	}
22912
22913	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22914	///
22915	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
22916	#[inline]
22917	#[target_feature(enable = "avx512f,avx512vl")]
22918	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22919	#[cfg_attr(test, assert_instr(vshufpd, MASK = `1`))]
22920	#[rustc_legacy_const_generics(`4`)]
22921	pub unsafe fn _mm_mask_shuffle_pd<const MASK: i32>(
22922	src: __m128d,
22923	k: __mmask8,
22924	a: __m128d,
22925	b: __m128d,
22926	) -> __m128d {
22927	static_assert_uimm_bits!(MASK, `8`);
22928	let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
22929	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
22930	}
22931
22932	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22933	///
22934	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
22935	#[inline]
22936	#[target_feature(enable = "avx512f,avx512vl")]
22937	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22938	#[cfg_attr(test, assert_instr(vshufpd, MASK = `1`))]
22939	#[rustc_legacy_const_generics(`3`)]
22940	pub unsafe fn _mm_maskz_shuffle_pd<const MASK: i32>(
22941	k: __mmask8,
22942	a: __m128d,
22943	b: __m128d,
22944	) -> __m128d {
22945	static_assert_uimm_bits!(MASK, `8`);
22946	let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
22947	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
22948	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:zero))
22949	}
22950
22951	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
22952	///
22953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32&expand=5177)
22954	#[inline]
22955	#[target_feature(enable = "avx512f")]
22956	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22957	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b10_01_01_01`))] //should be vshufi32x4
22958	#[rustc_legacy_const_generics(`2`)]
22959	pub unsafe fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
22960	static_assert_uimm_bits!(MASK, `8`);
22961	let a = a.as_i32x16();
22962	let b = b.as_i32x16();
22963	let r: i32x16 = simd_shuffle!(
22964	a,
22965	b,
22966	[
22967	(MASK as u32 & `0b11`) * `4` + `0`,
22968	(MASK as u32 & `0b11`) * `4` + `1`,
22969	(MASK as u32 & `0b11`) * `4` + `2`,
22970	(MASK as u32 & `0b11`) * `4` + `3`,
22971	((MASK as u32 >> `2`) & `0b11`) * `4` + `0`,
22972	((MASK as u32 >> `2`) & `0b11`) * `4` + `1`,
22973	((MASK as u32 >> `2`) & `0b11`) * `4` + `2`,
22974	((MASK as u32 >> `2`) & `0b11`) * `4` + `3`,
22975	((MASK as u32 >> `4`) & `0b11`) * `4` + `0` + `16`,
22976	((MASK as u32 >> `4`) & `0b11`) * `4` + `1` + `16`,
22977	((MASK as u32 >> `4`) & `0b11`) * `4` + `2` + `16`,
22978	((MASK as u32 >> `4`) & `0b11`) * `4` + `3` + `16`,
22979	((MASK as u32 >> `6`) & `0b11`) * `4` + `0` + `16`,
22980	((MASK as u32 >> `6`) & `0b11`) * `4` + `1` + `16`,
22981	((MASK as u32 >> `6`) & `0b11`) * `4` + `2` + `16`,
22982	((MASK as u32 >> `6`) & `0b11`) * `4` + `3` + `16`,
22983	],
22984	);
22985	transmute(r)
22986	}
22987
22988	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22989	///
22990	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x&expand=5175)
22991	#[inline]
22992	#[target_feature(enable = "avx512f")]
22993	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22994	#[cfg_attr(test, assert_instr(vshufi32x4, MASK = `0b10_11_01_01`))]
22995	#[rustc_legacy_const_generics(`4`)]
22996	pub unsafe fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
22997	src: __m512i,
22998	k: __mmask16,
22999	a: __m512i,
23000	b: __m512i,
23001	) -> __m512i {
23002	static_assert_uimm_bits!(MASK, `8`);
23003	let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
23004	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
23005	}
23006
23007	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23008	///
23009	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32&expand=5176)
23010	#[inline]
23011	#[target_feature(enable = "avx512f")]
23012	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23013	#[cfg_attr(test, assert_instr(vshufi32x4, MASK = `0b10_11_01_01`))]
23014	#[rustc_legacy_const_generics(`3`)]
23015	pub unsafe fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
23016	k: __mmask16,
23017	a: __m512i,
23018	b: __m512i,
23019	) -> __m512i {
23020	static_assert_uimm_bits!(MASK, `8`);
23021	let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
23022	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
23023	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:zero))
23024	}
23025
23026	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
23027	///
23028	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
23029	#[inline]
23030	#[target_feature(enable = "avx512f,avx512vl")]
23031	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23032	#[cfg_attr(test, assert_instr(vperm, MASK = `0b11`))] //should be vshufi32x4
23033	#[rustc_legacy_const_generics(`2`)]
23034	pub unsafe fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
23035	static_assert_uimm_bits!(MASK, `8`);
23036	let a: i32x8 = a.as_i32x8();
23037	let b: i32x8 = b.as_i32x8();
23038	let r: i32x8 = simd_shuffle!(
23039	a,
23040	b,
23041	[
23042	(MASK as u32 & `0b1`) * `4` + `0`,
23043	(MASK as u32 & `0b1`) * `4` + `1`,
23044	(MASK as u32 & `0b1`) * `4` + `2`,
23045	(MASK as u32 & `0b1`) * `4` + `3`,
23046	((MASK as u32 >> `1`) & `0b1`) * `4` + `0` + `8`,
23047	((MASK as u32 >> `1`) & `0b1`) * `4` + `1` + `8`,
23048	((MASK as u32 >> `1`) & `0b1`) * `4` + `2` + `8`,
23049	((MASK as u32 >> `1`) & `0b1`) * `4` + `3` + `8`,
23050	],
23051	);
23052	transmute(src:r)
23053	}
23054
23055	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23056	///
23057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
23058	#[inline]
23059	#[target_feature(enable = "avx512f,avx512vl")]
23060	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23061	#[cfg_attr(test, assert_instr(vshufi32x4, MASK = `0b11`))]
23062	#[rustc_legacy_const_generics(`4`)]
23063	pub unsafe fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
23064	src: __m256i,
23065	k: __mmask8,
23066	a: __m256i,
23067	b: __m256i,
23068	) -> __m256i {
23069	static_assert_uimm_bits!(MASK, `8`);
23070	let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
23071	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
23072	}
23073
23074	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23075	///
23076	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
23077	#[inline]
23078	#[target_feature(enable = "avx512f,avx512vl")]
23079	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23080	#[cfg_attr(test, assert_instr(vshufi32x4, MASK = `0b11`))]
23081	#[rustc_legacy_const_generics(`3`)]
23082	pub unsafe fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(
23083	k: __mmask8,
23084	a: __m256i,
23085	b: __m256i,
23086	) -> __m256i {
23087	static_assert_uimm_bits!(MASK, `8`);
23088	let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
23089	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
23090	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:zero))
23091	}
23092
23093	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
23094	///
23095	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
23096	#[inline]
23097	#[target_feature(enable = "avx512f")]
23098	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23099	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b10_11_11_11`))]
23100	#[rustc_legacy_const_generics(`2`)]
23101	pub unsafe fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
23102	static_assert_uimm_bits!(MASK, `8`);
23103	let a: i64x8 = a.as_i64x8();
23104	let b: i64x8 = b.as_i64x8();
23105	let r: i64x8 = simd_shuffle!(
23106	a,
23107	b,
23108	[
23109	(MASK as u32 & `0b11`) * `2` + `0`,
23110	(MASK as u32 & `0b11`) * `2` + `1`,
23111	((MASK as u32 >> `2`) & `0b11`) * `2` + `0`,
23112	((MASK as u32 >> `2`) & `0b11`) * `2` + `1`,
23113	((MASK as u32 >> `4`) & `0b11`) * `2` + `0` + `8`,
23114	((MASK as u32 >> `4`) & `0b11`) * `2` + `1` + `8`,
23115	((MASK as u32 >> `6`) & `0b11`) * `2` + `0` + `8`,
23116	((MASK as u32 >> `6`) & `0b11`) * `2` + `1` + `8`,
23117	],
23118	);
23119	transmute(src:r)
23120	}
23121
23122	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23123	///
23124	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x&expand=5181)
23125	#[inline]
23126	#[target_feature(enable = "avx512f")]
23127	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23128	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b10_11_11_11`))]
23129	#[rustc_legacy_const_generics(`4`)]
23130	pub unsafe fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
23131	src: __m512i,
23132	k: __mmask8,
23133	a: __m512i,
23134	b: __m512i,
23135	) -> __m512i {
23136	static_assert_uimm_bits!(MASK, `8`);
23137	let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
23138	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
23139	}
23140
23141	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23142	///
23143	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64&expand=5182)
23144	#[inline]
23145	#[target_feature(enable = "avx512f")]
23146	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23147	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b10_11_11_11`))]
23148	#[rustc_legacy_const_generics(`3`)]
23149	pub unsafe fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(
23150	k: __mmask8,
23151	a: __m512i,
23152	b: __m512i,
23153	) -> __m512i {
23154	static_assert_uimm_bits!(MASK, `8`);
23155	let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
23156	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
23157	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:zero))
23158	}
23159
23160	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
23161	///
23162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
23163	#[inline]
23164	#[target_feature(enable = "avx512f,avx512vl")]
23165	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23166	#[cfg_attr(test, assert_instr(vperm, MASK = `0b01`))] //should be vshufi64x2
23167	#[rustc_legacy_const_generics(`2`)]
23168	pub unsafe fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
23169	static_assert_uimm_bits!(MASK, `8`);
23170	let a: i64x4 = a.as_i64x4();
23171	let b: i64x4 = b.as_i64x4();
23172	let r: i64x4 = simd_shuffle!(
23173	a,
23174	b,
23175	[
23176	(MASK as u32 & `0b1`) * `2` + `0`,
23177	(MASK as u32 & `0b1`) * `2` + `1`,
23178	((MASK as u32 >> `1`) & `0b1`) * `2` + `0` + `4`,
23179	((MASK as u32 >> `1`) & `0b1`) * `2` + `1` + `4`,
23180	],
23181	);
23182	transmute(src:r)
23183	}
23184
23185	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23186	///
23187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
23188	#[inline]
23189	#[target_feature(enable = "avx512f,avx512vl")]
23190	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23191	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b11`))]
23192	#[rustc_legacy_const_generics(`4`)]
23193	pub unsafe fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
23194	src: __m256i,
23195	k: __mmask8,
23196	a: __m256i,
23197	b: __m256i,
23198	) -> __m256i {
23199	static_assert_uimm_bits!(MASK, `8`);
23200	let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
23201	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
23202	}
23203
23204	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23205	///
23206	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
23207	#[inline]
23208	#[target_feature(enable = "avx512f,avx512vl")]
23209	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23210	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b11`))]
23211	#[rustc_legacy_const_generics(`3`)]
23212	pub unsafe fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(
23213	k: __mmask8,
23214	a: __m256i,
23215	b: __m256i,
23216	) -> __m256i {
23217	static_assert_uimm_bits!(MASK, `8`);
23218	let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
23219	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
23220	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:zero))
23221	}
23222
23223	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
23224	///
23225	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
23226	#[inline]
23227	#[target_feature(enable = "avx512f")]
23228	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23229	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b1011`))] //should be vshuff32x4, but generate vshuff64x2
23230	#[rustc_legacy_const_generics(`2`)]
23231	pub unsafe fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
23232	static_assert_uimm_bits!(MASK, `8`);
23233	let a = a.as_f32x16();
23234	let b = b.as_f32x16();
23235	let r: f32x16 = simd_shuffle!(
23236	a,
23237	b,
23238	[
23239	(MASK as u32 & `0b11`) * `4` + `0`,
23240	(MASK as u32 & `0b11`) * `4` + `1`,
23241	(MASK as u32 & `0b11`) * `4` + `2`,
23242	(MASK as u32 & `0b11`) * `4` + `3`,
23243	((MASK as u32 >> `2`) & `0b11`) * `4` + `0`,
23244	((MASK as u32 >> `2`) & `0b11`) * `4` + `1`,
23245	((MASK as u32 >> `2`) & `0b11`) * `4` + `2`,
23246	((MASK as u32 >> `2`) & `0b11`) * `4` + `3`,
23247	((MASK as u32 >> `4`) & `0b11`) * `4` + `0` + `16`,
23248	((MASK as u32 >> `4`) & `0b11`) * `4` + `1` + `16`,
23249	((MASK as u32 >> `4`) & `0b11`) * `4` + `2` + `16`,
23250	((MASK as u32 >> `4`) & `0b11`) * `4` + `3` + `16`,
23251	((MASK as u32 >> `6`) & `0b11`) * `4` + `0` + `16`,
23252	((MASK as u32 >> `6`) & `0b11`) * `4` + `1` + `16`,
23253	((MASK as u32 >> `6`) & `0b11`) * `4` + `2` + `16`,
23254	((MASK as u32 >> `6`) & `0b11`) * `4` + `3` + `16`,
23255	],
23256	);
23257	transmute(r)
23258	}
23259
23260	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23261	///
23262	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32&expand=5163)
23263	#[inline]
23264	#[target_feature(enable = "avx512f")]
23265	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23266	#[cfg_attr(test, assert_instr(vshuff32x4, MASK = `0b1011`))]
23267	#[rustc_legacy_const_generics(`4`)]
23268	pub unsafe fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
23269	src: __m512,
23270	k: __mmask16,
23271	a: __m512,
23272	b: __m512,
23273	) -> __m512 {
23274	static_assert_uimm_bits!(MASK, `8`);
23275	let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
23276	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
23277	}
23278
23279	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23280	///
23281	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32&expand=5164)
23282	#[inline]
23283	#[target_feature(enable = "avx512f")]
23284	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23285	#[cfg_attr(test, assert_instr(vshuff32x4, MASK = `0b1011`))]
23286	#[rustc_legacy_const_generics(`3`)]
23287	pub unsafe fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(
23288	k: __mmask16,
23289	a: __m512,
23290	b: __m512,
23291	) -> __m512 {
23292	static_assert_uimm_bits!(MASK, `8`);
23293	let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
23294	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
23295	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:zero))
23296	}
23297
23298	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
23299	///
23300	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
23301	#[inline]
23302	#[target_feature(enable = "avx512f,avx512vl")]
23303	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23304	#[cfg_attr(test, assert_instr(vperm, MASK = `0b01`))] //should be vshuff32x4
23305	#[rustc_legacy_const_generics(`2`)]
23306	pub unsafe fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
23307	static_assert_uimm_bits!(MASK, `8`);
23308	let a: f32x8 = a.as_f32x8();
23309	let b: f32x8 = b.as_f32x8();
23310	let r: f32x8 = simd_shuffle!(
23311	a,
23312	b,
23313	[
23314	(MASK as u32 & `0b1`) * `4` + `0`,
23315	(MASK as u32 & `0b1`) * `4` + `1`,
23316	(MASK as u32 & `0b1`) * `4` + `2`,
23317	(MASK as u32 & `0b1`) * `4` + `3`,
23318	((MASK as u32 >> `1`) & `0b1`) * `4` + `0` + `8`,
23319	((MASK as u32 >> `1`) & `0b1`) * `4` + `1` + `8`,
23320	((MASK as u32 >> `1`) & `0b1`) * `4` + `2` + `8`,
23321	((MASK as u32 >> `1`) & `0b1`) * `4` + `3` + `8`,
23322	],
23323	);
23324	transmute(src:r)
23325	}
23326
23327	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23328	///
23329	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
23330	#[inline]
23331	#[target_feature(enable = "avx512f,avx512vl")]
23332	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23333	#[cfg_attr(test, assert_instr(vshuff32x4, MASK = `0b11`))]
23334	#[rustc_legacy_const_generics(`4`)]
23335	pub unsafe fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
23336	src: __m256,
23337	k: __mmask8,
23338	a: __m256,
23339	b: __m256,
23340	) -> __m256 {
23341	static_assert_uimm_bits!(MASK, `8`);
23342	let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
23343	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
23344	}
23345
23346	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23347	///
23348	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
23349	#[inline]
23350	#[target_feature(enable = "avx512f,avx512vl")]
23351	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23352	#[cfg_attr(test, assert_instr(vshuff32x4, MASK = `0b11`))]
23353	#[rustc_legacy_const_generics(`3`)]
23354	pub unsafe fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(
23355	k: __mmask8,
23356	a: __m256,
23357	b: __m256,
23358	) -> __m256 {
23359	static_assert_uimm_bits!(MASK, `8`);
23360	let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
23361	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
23362	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:zero))
23363	}
23364
23365	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
23366	///
23367	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
23368	#[inline]
23369	#[target_feature(enable = "avx512f")]
23370	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23371	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b10_11_11_11`))]
23372	#[rustc_legacy_const_generics(`2`)]
23373	pub unsafe fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
23374	static_assert_uimm_bits!(MASK, `8`);
23375	let a: f64x8 = a.as_f64x8();
23376	let b: f64x8 = b.as_f64x8();
23377	let r: f64x8 = simd_shuffle!(
23378	a,
23379	b,
23380	[
23381	(MASK as u32 & `0b11`) * `2` + `0`,
23382	(MASK as u32 & `0b11`) * `2` + `1`,
23383	((MASK as u32 >> `2`) & `0b11`) * `2` + `0`,
23384	((MASK as u32 >> `2`) & `0b11`) * `2` + `1`,
23385	((MASK as u32 >> `4`) & `0b11`) * `2` + `0` + `8`,
23386	((MASK as u32 >> `4`) & `0b11`) * `2` + `1` + `8`,
23387	((MASK as u32 >> `6`) & `0b11`) * `2` + `0` + `8`,
23388	((MASK as u32 >> `6`) & `0b11`) * `2` + `1` + `8`,
23389	],
23390	);
23391	transmute(src:r)
23392	}
23393
23394	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23395	///
23396	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
23397	#[inline]
23398	#[target_feature(enable = "avx512f")]
23399	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23400	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b10_11_11_11`))]
23401	#[rustc_legacy_const_generics(`4`)]
23402	pub unsafe fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
23403	src: __m512d,
23404	k: __mmask8,
23405	a: __m512d,
23406	b: __m512d,
23407	) -> __m512d {
23408	static_assert_uimm_bits!(MASK, `8`);
23409	let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
23410	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
23411	}
23412
23413	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23414	///
23415	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
23416	#[inline]
23417	#[target_feature(enable = "avx512f")]
23418	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23419	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b10_11_11_11`))]
23420	#[rustc_legacy_const_generics(`3`)]
23421	pub unsafe fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(
23422	k: __mmask8,
23423	a: __m512d,
23424	b: __m512d,
23425	) -> __m512d {
23426	static_assert_uimm_bits!(MASK, `8`);
23427	let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
23428	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
23429	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:zero))
23430	}
23431
23432	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
23433	///
23434	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
23435	#[inline]
23436	#[target_feature(enable = "avx512f,avx512vl")]
23437	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23438	#[cfg_attr(test, assert_instr(vperm, MASK = `0b01`))] //should be vshuff64x2
23439	#[rustc_legacy_const_generics(`2`)]
23440	pub unsafe fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
23441	static_assert_uimm_bits!(MASK, `8`);
23442	let a: f64x4 = a.as_f64x4();
23443	let b: f64x4 = b.as_f64x4();
23444	let r: f64x4 = simd_shuffle!(
23445	a,
23446	b,
23447	[
23448	(MASK as u32 & `0b1`) * `2` + `0`,
23449	(MASK as u32 & `0b1`) * `2` + `1`,
23450	((MASK as u32 >> `1`) & `0b1`) * `2` + `0` + `4`,
23451	((MASK as u32 >> `1`) & `0b1`) * `2` + `1` + `4`,
23452	],
23453	);
23454	transmute(src:r)
23455	}
23456
23457	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23458	///
23459	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
23460	#[inline]
23461	#[target_feature(enable = "avx512f,avx512vl")]
23462	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23463	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b11`))]
23464	#[rustc_legacy_const_generics(`4`)]
23465	pub unsafe fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
23466	src: __m256d,
23467	k: __mmask8,
23468	a: __m256d,
23469	b: __m256d,
23470	) -> __m256d {
23471	static_assert_uimm_bits!(MASK, `8`);
23472	let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
23473	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
23474	}
23475
23476	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23477	///
23478	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
23479	#[inline]
23480	#[target_feature(enable = "avx512f,avx512vl")]
23481	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23482	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b11`))]
23483	#[rustc_legacy_const_generics(`3`)]
23484	pub unsafe fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(
23485	k: __mmask8,
23486	a: __m256d,
23487	b: __m256d,
23488	) -> __m256d {
23489	static_assert_uimm_bits!(MASK, `8`);
23490	let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
23491	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
23492	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:zero))
23493	}
23494
23495	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
23496	///
23497	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
23498	#[inline]
23499	#[target_feature(enable = "avx512f")]
23500	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23501	#[cfg_attr(
23502	all(test, not(target_os = "windows")),
23503	assert_instr(vextractf32x4, IMM8 = `3`)
23504	)]
23505	#[rustc_legacy_const_generics(`1`)]
23506	pub unsafe fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
23507	static_assert_uimm_bits!(IMM8, `2`);
23508	match IMM8 & `0x3` {
23509	`0` => simd_shuffle!(a, _mm512_undefined_ps(), [`0`, `1`, `2`, `3`]),
23510	`1` => simd_shuffle!(a, _mm512_undefined_ps(), [`4`, `5`, `6`, `7`]),
23511	`2` => simd_shuffle!(a, _mm512_undefined_ps(), [`8`, `9`, `10`, `11`]),
23512	_ => simd_shuffle!(a, _mm512_undefined_ps(), [`12`, `13`, `14`, `15`]),
23513	}
23514	}
23515
23516	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23517	///
23518	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
23519	#[inline]
23520	#[target_feature(enable = "avx512f")]
23521	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23522	#[cfg_attr(
23523	all(test, not(target_os = "windows")),
23524	assert_instr(vextractf32x4, IMM8 = `3`)
23525	)]
23526	#[rustc_legacy_const_generics(`3`)]
23527	pub unsafe fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(
23528	src: __m128,
23529	k: __mmask8,
23530	a: __m512,
23531	) -> __m128 {
23532	static_assert_uimm_bits!(IMM8, `2`);
23533	let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
23534	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
23535	}
23536
23537	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23538	///
23539	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
23540	#[inline]
23541	#[target_feature(enable = "avx512f")]
23542	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23543	#[cfg_attr(
23544	all(test, not(target_os = "windows")),
23545	assert_instr(vextractf32x4, IMM8 = `3`)
23546	)]
23547	#[rustc_legacy_const_generics(`2`)]
23548	pub unsafe fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
23549	static_assert_uimm_bits!(IMM8, `2`);
23550	let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
23551	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
23552	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:zero))
23553	}
23554
23555	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
23556	///
23557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
23558	#[inline]
23559	#[target_feature(enable = "avx512f,avx512vl")]
23560	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23561	#[cfg_attr(
23562	all(test, not(target_os = "windows")),
23563	assert_instr(vextract, IMM8 = `1`) //should be vextractf32x4
23564	)]
23565	#[rustc_legacy_const_generics(`1`)]
23566	pub unsafe fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
23567	static_assert_uimm_bits!(IMM8, `1`);
23568	match IMM8 & `0x1` {
23569	`0` => simd_shuffle!(a, _mm256_undefined_ps(), [`0`, `1`, `2`, `3`]),
23570	_ => simd_shuffle!(a, _mm256_undefined_ps(), [`4`, `5`, `6`, `7`]),
23571	}
23572	}
23573
23574	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23575	///
23576	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
23577	#[inline]
23578	#[target_feature(enable = "avx512f,avx512vl")]
23579	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23580	#[cfg_attr(
23581	all(test, not(target_os = "windows")),
23582	assert_instr(vextractf32x4, IMM8 = `1`)
23583	)]
23584	#[rustc_legacy_const_generics(`3`)]
23585	pub unsafe fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(
23586	src: __m128,
23587	k: __mmask8,
23588	a: __m256,
23589	) -> __m128 {
23590	static_assert_uimm_bits!(IMM8, `1`);
23591	let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
23592	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
23593	}
23594
23595	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23596	///
23597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
23598	#[inline]
23599	#[target_feature(enable = "avx512f,avx512vl")]
23600	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23601	#[cfg_attr(
23602	all(test, not(target_os = "windows")),
23603	assert_instr(vextractf32x4, IMM8 = `1`)
23604	)]
23605	#[rustc_legacy_const_generics(`2`)]
23606	pub unsafe fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
23607	static_assert_uimm_bits!(IMM8, `1`);
23608	let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
23609	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
23610	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:zero))
23611	}
23612
23613	/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
23614	///
23615	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
23616	#[inline]
23617	#[target_feature(enable = "avx512f")]
23618	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23619	#[cfg_attr(
23620	all(test, not(target_os = "windows")),
23621	assert_instr(vextractf64x4, IMM1 = `1`) //should be vextracti64x4
23622	)]
23623	#[rustc_legacy_const_generics(`1`)]
23624	pub unsafe fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
23625	static_assert_uimm_bits!(IMM1, `1`);
23626	match IMM1 {
23627	`0` => simd_shuffle!(a, _mm512_set1_epi64(`0`), [`0`, `1`, `2`, `3`]),
23628	_ => simd_shuffle!(a, _mm512_set1_epi64(`0`), [`4`, `5`, `6`, `7`]),
23629	}
23630	}
23631
23632	/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23633	///
23634	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
23635	#[inline]
23636	#[target_feature(enable = "avx512f")]
23637	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23638	#[cfg_attr(
23639	all(test, not(target_os = "windows")),
23640	assert_instr(vextracti64x4, IMM1 = `1`)
23641	)]
23642	#[rustc_legacy_const_generics(`3`)]
23643	pub unsafe fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
23644	src: __m256i,
23645	k: __mmask8,
23646	a: __m512i,
23647	) -> __m256i {
23648	static_assert_uimm_bits!(IMM1, `1`);
23649	let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
23650	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
23651	}
23652
23653	/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23654	///
23655	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
23656	#[inline]
23657	#[target_feature(enable = "avx512f")]
23658	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23659	#[cfg_attr(
23660	all(test, not(target_os = "windows")),
23661	assert_instr(vextracti64x4, IMM1 = `1`)
23662	)]
23663	#[rustc_legacy_const_generics(`2`)]
23664	pub unsafe fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
23665	static_assert_uimm_bits!(IMM1, `1`);
23666	let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
23667	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
23668	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:zero))
23669	}
23670
23671	/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
23672	///
23673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
23674	#[inline]
23675	#[target_feature(enable = "avx512f")]
23676	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23677	#[cfg_attr(
23678	all(test, not(target_os = "windows")),
23679	assert_instr(vextractf64x4, IMM8 = `1`)
23680	)]
23681	#[rustc_legacy_const_generics(`1`)]
23682	pub unsafe fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
23683	static_assert_uimm_bits!(IMM8, `1`);
23684	match IMM8 & `0x1` {
23685	`0` => simd_shuffle!(a, _mm512_undefined_pd(), [`0`, `1`, `2`, `3`]),
23686	_ => simd_shuffle!(a, _mm512_undefined_pd(), [`4`, `5`, `6`, `7`]),
23687	}
23688	}
23689
23690	/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23691	///
23692	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
23693	#[inline]
23694	#[target_feature(enable = "avx512f")]
23695	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23696	#[cfg_attr(
23697	all(test, not(target_os = "windows")),
23698	assert_instr(vextractf64x4, IMM8 = `1`)
23699	)]
23700	#[rustc_legacy_const_generics(`3`)]
23701	pub unsafe fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
23702	src: __m256d,
23703	k: __mmask8,
23704	a: __m512d,
23705	) -> __m256d {
23706	static_assert_uimm_bits!(IMM8, `1`);
23707	let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
23708	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
23709	}
23710
23711	/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23712	///
23713	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
23714	#[inline]
23715	#[target_feature(enable = "avx512f")]
23716	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23717	#[cfg_attr(
23718	all(test, not(target_os = "windows")),
23719	assert_instr(vextractf64x4, IMM8 = `1`)
23720	)]
23721	#[rustc_legacy_const_generics(`2`)]
23722	pub unsafe fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
23723	static_assert_uimm_bits!(IMM8, `1`);
23724	let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
23725	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
23726	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:zero))
23727	}
23728
23729	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
23730	///
23731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
23732	#[inline]
23733	#[target_feature(enable = "avx512f")]
23734	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23735	#[cfg_attr(
23736	all(test, not(target_os = "windows")),
23737	assert_instr(vextractf32x4, IMM2 = `3`) //should be vextracti32x4
23738	)]
23739	#[rustc_legacy_const_generics(`1`)]
23740	pub unsafe fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
23741	static_assert_uimm_bits!(IMM2, `2`);
23742	let a: i32x16 = a.as_i32x16();
23743	let undefined: i32x16 = _mm512_undefined_epi32().as_i32x16();
23744	let extract: i32x4 = match IMM2 {
23745	`0` => simd_shuffle!(a, undefined, [`0`, `1`, `2`, `3`]),
23746	`1` => simd_shuffle!(a, undefined, [`4`, `5`, `6`, `7`]),
23747	`2` => simd_shuffle!(a, undefined, [`8`, `9`, `10`, `11`]),
23748	_ => simd_shuffle!(a, undefined, [`12`, `13`, `14`, `15`]),
23749	};
23750	transmute(src:extract)
23751	}
23752
23753	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23754	///
23755	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
23756	#[inline]
23757	#[target_feature(enable = "avx512f")]
23758	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23759	#[cfg_attr(
23760	all(test, not(target_os = "windows")),
23761	assert_instr(vextracti32x4, IMM2 = `3`)
23762	)]
23763	#[rustc_legacy_const_generics(`3`)]
23764	pub unsafe fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
23765	src: __m128i,
23766	k: __mmask8,
23767	a: __m512i,
23768	) -> __m128i {
23769	static_assert_uimm_bits!(IMM2, `2`);
23770	let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
23771	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
23772	}
23773
23774	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23775	///
23776	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
23777	#[inline]
23778	#[target_feature(enable = "avx512f")]
23779	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23780	#[cfg_attr(
23781	all(test, not(target_os = "windows")),
23782	assert_instr(vextracti32x4, IMM2 = `3`)
23783	)]
23784	#[rustc_legacy_const_generics(`2`)]
23785	pub unsafe fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
23786	static_assert_uimm_bits!(IMM2, `2`);
23787	let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
23788	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
23789	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:zero))
23790	}
23791
23792	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
23793	///
23794	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
23795	#[inline]
23796	#[target_feature(enable = "avx512f,avx512vl")]
23797	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23798	#[cfg_attr(
23799	all(test, not(target_os = "windows")),
23800	assert_instr(vextract, IMM1 = `1`) //should be vextracti32x4
23801	)]
23802	#[rustc_legacy_const_generics(`1`)]
23803	pub unsafe fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
23804	static_assert_uimm_bits!(IMM1, `1`);
23805	let a: i32x8 = a.as_i32x8();
23806	let undefined: i32x8 = _mm256_undefined_si256().as_i32x8();
23807	let extract: i32x4 = match IMM1 {
23808	`0` => simd_shuffle!(a, undefined, [`0`, `1`, `2`, `3`]),
23809	_ => simd_shuffle!(a, undefined, [`4`, `5`, `6`, `7`]),
23810	};
23811	transmute(src:extract)
23812	}
23813
23814	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23815	///
23816	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
23817	#[inline]
23818	#[target_feature(enable = "avx512f,avx512vl")]
23819	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23820	#[cfg_attr(
23821	all(test, not(target_os = "windows")),
23822	assert_instr(vextracti32x4, IMM1 = `1`)
23823	)]
23824	#[rustc_legacy_const_generics(`3`)]
23825	pub unsafe fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
23826	src: __m128i,
23827	k: __mmask8,
23828	a: __m256i,
23829	) -> __m128i {
23830	static_assert_uimm_bits!(IMM1, `1`);
23831	let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
23832	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
23833	}
23834
23835	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23836	///
23837	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
23838	#[inline]
23839	#[target_feature(enable = "avx512f,avx512vl")]
23840	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23841	#[cfg_attr(
23842	all(test, not(target_os = "windows")),
23843	assert_instr(vextracti32x4, IMM1 = `1`)
23844	)]
23845	#[rustc_legacy_const_generics(`2`)]
23846	pub unsafe fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
23847	static_assert_uimm_bits!(IMM1, `1`);
23848	let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
23849	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
23850	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:zero))
23851	}
23852
23853	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
23854	///
23855	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
23856	#[inline]
23857	#[target_feature(enable = "avx512f")]
23858	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23859	#[cfg_attr(test, assert_instr(vmovsldup))]
23860	pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 {
23861	let r: f32x16 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`, `8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`]);
23862	transmute(src:r)
23863	}
23864
23865	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23866	///
23867	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
23868	#[inline]
23869	#[target_feature(enable = "avx512f")]
23870	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23871	#[cfg_attr(test, assert_instr(vmovsldup))]
23872	pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
23873	let mov: f32x16 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`, `8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`]);
23874	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
23875	}
23876
23877	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23878	///
23879	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
23880	#[inline]
23881	#[target_feature(enable = "avx512f")]
23882	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23883	#[cfg_attr(test, assert_instr(vmovsldup))]
23884	pub unsafe fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
23885	let mov: f32x16 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`, `8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`]);
23886	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
23887	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
23888	}
23889
23890	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23891	///
23892	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
23893	#[inline]
23894	#[target_feature(enable = "avx512f,avx512vl")]
23895	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23896	#[cfg_attr(test, assert_instr(vmovsldup))]
23897	pub unsafe fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
23898	let mov: __m256 = _mm256_moveldup_ps(a);
23899	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
23900	}
23901
23902	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23903	///
23904	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
23905	#[inline]
23906	#[target_feature(enable = "avx512f,avx512vl")]
23907	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23908	#[cfg_attr(test, assert_instr(vmovsldup))]
23909	pub unsafe fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
23910	let mov: __m256 = _mm256_moveldup_ps(a);
23911	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
23912	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:zero))
23913	}
23914
23915	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23916	///
23917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
23918	#[inline]
23919	#[target_feature(enable = "avx512f,avx512vl")]
23920	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23921	#[cfg_attr(test, assert_instr(vmovsldup))]
23922	pub unsafe fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
23923	let mov: __m128 = _mm_moveldup_ps(a);
23924	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
23925	}
23926
23927	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23928	///
23929	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
23930	#[inline]
23931	#[target_feature(enable = "avx512f,avx512vl")]
23932	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23933	#[cfg_attr(test, assert_instr(vmovsldup))]
23934	pub unsafe fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
23935	let mov: __m128 = _mm_moveldup_ps(a);
23936	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
23937	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:zero))
23938	}
23939
23940	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
23941	///
23942	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_movehdup_ps&expand=3852)
23943	#[inline]
23944	#[target_feature(enable = "avx512f")]
23945	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23946	#[cfg_attr(test, assert_instr(vmovshdup))]
23947	pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 {
23948	let r: f32x16 = simd_shuffle!(a, a, [`1`, `1`, `3`, `3`, `5`, `5`, `7`, `7`, `9`, `9`, `11`, `11`, `13`, `13`, `15`, `15`]);
23949	transmute(src:r)
23950	}
23951
23952	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23953	///
23954	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup&expand=3850)
23955	#[inline]
23956	#[target_feature(enable = "avx512f")]
23957	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23958	#[cfg_attr(test, assert_instr(vmovshdup))]
23959	pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
23960	let mov: f32x16 = simd_shuffle!(a, a, [`1`, `1`, `3`, `3`, `5`, `5`, `7`, `7`, `9`, `9`, `11`, `11`, `13`, `13`, `15`, `15`]);
23961	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
23962	}
23963
23964	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23965	///
23966	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveh&expand=3851)
23967	#[inline]
23968	#[target_feature(enable = "avx512f")]
23969	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23970	#[cfg_attr(test, assert_instr(vmovshdup))]
23971	pub unsafe fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
23972	let mov: f32x16 = simd_shuffle!(a, a, [`1`, `1`, `3`, `3`, `5`, `5`, `7`, `7`, `9`, `9`, `11`, `11`, `13`, `13`, `15`, `15`]);
23973	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
23974	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
23975	}
23976
23977	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23978	///
23979	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
23980	#[inline]
23981	#[target_feature(enable = "avx512f,avx512vl")]
23982	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23983	#[cfg_attr(test, assert_instr(vmovshdup))]
23984	pub unsafe fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
23985	let mov: __m256 = _mm256_movehdup_ps(a);
23986	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
23987	}
23988
23989	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23990	///
23991	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
23992	#[inline]
23993	#[target_feature(enable = "avx512f,avx512vl")]
23994	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23995	#[cfg_attr(test, assert_instr(vmovshdup))]
23996	pub unsafe fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
23997	let mov: __m256 = _mm256_movehdup_ps(a);
23998	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
23999	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:zero))
24000	}
24001
24002	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24003	///
24004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
24005	#[inline]
24006	#[target_feature(enable = "avx512f,avx512vl")]
24007	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24008	#[cfg_attr(test, assert_instr(vmovshdup))]
24009	pub unsafe fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
24010	let mov: __m128 = _mm_movehdup_ps(a);
24011	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
24012	}
24013
24014	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24015	///
24016	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
24017	#[inline]
24018	#[target_feature(enable = "avx512f,avx512vl")]
24019	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24020	#[cfg_attr(test, assert_instr(vmovshdup))]
24021	pub unsafe fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
24022	let mov: __m128 = _mm_movehdup_ps(a);
24023	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
24024	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:zero))
24025	}
24026
24027	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
24028	///
24029	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
24030	#[inline]
24031	#[target_feature(enable = "avx512f")]
24032	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24033	#[cfg_attr(test, assert_instr(vmovddup))]
24034	pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d {
24035	let r: f64x8 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`]);
24036	transmute(src:r)
24037	}
24038
24039	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24040	///
24041	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
24042	#[inline]
24043	#[target_feature(enable = "avx512f")]
24044	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24045	#[cfg_attr(test, assert_instr(vmovddup))]
24046	pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
24047	let mov: f64x8 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`]);
24048	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
24049	}
24050
24051	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24052	///
24053	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
24054	#[inline]
24055	#[target_feature(enable = "avx512f")]
24056	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24057	#[cfg_attr(test, assert_instr(vmovddup))]
24058	pub unsafe fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
24059	let mov: f64x8 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`]);
24060	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
24061	transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
24062	}
24063
24064	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24065	///
24066	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
24067	#[inline]
24068	#[target_feature(enable = "avx512f,avx512vl")]
24069	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24070	#[cfg_attr(test, assert_instr(vmovddup))]
24071	pub unsafe fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
24072	let mov: __m256d = _mm256_movedup_pd(a);
24073	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:src.as_f64x4()))
24074	}
24075
24076	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24077	///
24078	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
24079	#[inline]
24080	#[target_feature(enable = "avx512f,avx512vl")]
24081	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24082	#[cfg_attr(test, assert_instr(vmovddup))]
24083	pub unsafe fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
24084	let mov: __m256d = _mm256_movedup_pd(a);
24085	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
24086	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:zero))
24087	}
24088
24089	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24090	///
24091	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
24092	#[inline]
24093	#[target_feature(enable = "avx512f,avx512vl")]
24094	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24095	#[cfg_attr(test, assert_instr(vmovddup))]
24096	pub unsafe fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
24097	let mov: __m128d = _mm_movedup_pd(a);
24098	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:src.as_f64x2()))
24099	}
24100
24101	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24102	///
24103	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
24104	#[inline]
24105	#[target_feature(enable = "avx512f,avx512vl")]
24106	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24107	#[cfg_attr(test, assert_instr(vmovddup))]
24108	pub unsafe fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
24109	let mov: __m128d = _mm_movedup_pd(a);
24110	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
24111	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:zero))
24112	}
24113
24114	/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
24115	///
24116	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
24117	#[inline]
24118	#[target_feature(enable = "avx512f")]
24119	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24120	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `2`))] //should be vinserti32x4
24121	#[rustc_legacy_const_generics(`2`)]
24122	pub unsafe fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
24123	static_assert_uimm_bits!(IMM8, `2`);
24124	let a: i32x16 = a.as_i32x16();
24125	let b: i32x16 = _mm512_castsi128_si512(b).as_i32x16();
24126	let ret: i32x16 = match IMM8 & `0b11` {
24127	`0` => simd_shuffle!(
24128	a,
24129	b,
24130	[`16`, `17`, `18`, `19`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`],
24131	),
24132	`1` => simd_shuffle!(
24133	a,
24134	b,
24135	[`0`, `1`, `2`, `3`, `16`, `17`, `18`, `19`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`],
24136	),
24137	`2` => simd_shuffle!(
24138	a,
24139	b,
24140	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `16`, `17`, `18`, `19`, `12`, `13`, `14`, `15`],
24141	),
24142	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `16`, `17`, `18`, `19`]),
24143	};
24144	transmute(src:ret)
24145	}
24146
24147	/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24148	///
24149	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
24150	#[inline]
24151	#[target_feature(enable = "avx512f")]
24152	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24153	#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = `2`))]
24154	#[rustc_legacy_const_generics(`4`)]
24155	pub unsafe fn _mm512_mask_inserti32x4<const IMM8: i32>(
24156	src: __m512i,
24157	k: __mmask16,
24158	a: __m512i,
24159	b: __m128i,
24160	) -> __m512i {
24161	static_assert_uimm_bits!(IMM8, `2`);
24162	let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
24163	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
24164	}
24165
24166	/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24167	///
24168	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
24169	#[inline]
24170	#[target_feature(enable = "avx512f")]
24171	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24172	#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = `2`))]
24173	#[rustc_legacy_const_generics(`3`)]
24174	pub unsafe fn _mm512_maskz_inserti32x4<const IMM8: i32>(
24175	k: __mmask16,
24176	a: __m512i,
24177	b: __m128i,
24178	) -> __m512i {
24179	static_assert_uimm_bits!(IMM8, `2`);
24180	let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
24181	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
24182	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:zero))
24183	}
24184
24185	/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
24186	///
24187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
24188	#[inline]
24189	#[target_feature(enable = "avx512f,avx512vl")]
24190	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24191	#[cfg_attr(
24192	all(test, not(target_os = "windows")),
24193	assert_instr(vinsert, IMM8 = `1`) //should be vinserti32x4
24194	)]
24195	#[rustc_legacy_const_generics(`2`)]
24196	pub unsafe fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
24197	static_assert_uimm_bits!(IMM8, `1`);
24198	let a: i32x8 = a.as_i32x8();
24199	let b: i32x8 = _mm256_castsi128_si256(b).as_i32x8();
24200	let ret: i32x8 = match IMM8 & `0b1` {
24201	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`]),
24202	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]),
24203	};
24204	transmute(src:ret)
24205	}
24206
24207	/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24208	///
24209	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
24210	#[inline]
24211	#[target_feature(enable = "avx512f,avx512vl")]
24212	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24213	#[cfg_attr(
24214	all(test, not(target_os = "windows")),
24215	assert_instr(vinserti32x4, IMM8 = `1`)
24216	)]
24217	#[rustc_legacy_const_generics(`4`)]
24218	pub unsafe fn _mm256_mask_inserti32x4<const IMM8: i32>(
24219	src: __m256i,
24220	k: __mmask8,
24221	a: __m256i,
24222	b: __m128i,
24223	) -> __m256i {
24224	static_assert_uimm_bits!(IMM8, `1`);
24225	let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
24226	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
24227	}
24228
24229	/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24230	///
24231	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
24232	#[inline]
24233	#[target_feature(enable = "avx512f,avx512vl")]
24234	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24235	#[cfg_attr(
24236	all(test, not(target_os = "windows")),
24237	assert_instr(vinserti32x4, IMM8 = `1`)
24238	)]
24239	#[rustc_legacy_const_generics(`3`)]
24240	pub unsafe fn _mm256_maskz_inserti32x4<const IMM8: i32>(
24241	k: __mmask8,
24242	a: __m256i,
24243	b: __m128i,
24244	) -> __m256i {
24245	static_assert_uimm_bits!(IMM8, `1`);
24246	let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
24247	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
24248	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:zero))
24249	}
24250
24251	/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
24252	///
24253	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
24254	#[inline]
24255	#[target_feature(enable = "avx512f")]
24256	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24257	#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = `1`))] //should be vinserti64x4
24258	#[rustc_legacy_const_generics(`2`)]
24259	pub unsafe fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
24260	static_assert_uimm_bits!(IMM8, `1`);
24261	let b: __m512i = _mm512_castsi256_si512(b);
24262	match IMM8 & `0b1` {
24263	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`]),
24264	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]),
24265	}
24266	}
24267
24268	/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24269	///
24270	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
24271	#[inline]
24272	#[target_feature(enable = "avx512f")]
24273	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24274	#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = `1`))]
24275	#[rustc_legacy_const_generics(`4`)]
24276	pub unsafe fn _mm512_mask_inserti64x4<const IMM8: i32>(
24277	src: __m512i,
24278	k: __mmask8,
24279	a: __m512i,
24280	b: __m256i,
24281	) -> __m512i {
24282	static_assert_uimm_bits!(IMM8, `1`);
24283	let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
24284	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
24285	}
24286
24287	/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24288	///
24289	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
24290	#[inline]
24291	#[target_feature(enable = "avx512f")]
24292	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24293	#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = `1`))]
24294	#[rustc_legacy_const_generics(`3`)]
24295	pub unsafe fn _mm512_maskz_inserti64x4<const IMM8: i32>(
24296	k: __mmask8,
24297	a: __m512i,
24298	b: __m256i,
24299	) -> __m512i {
24300	static_assert_uimm_bits!(IMM8, `1`);
24301	let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
24302	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
24303	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:zero))
24304	}
24305
24306	/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
24307	///
24308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
24309	#[inline]
24310	#[target_feature(enable = "avx512f")]
24311	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24312	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `2`))]
24313	#[rustc_legacy_const_generics(`2`)]
24314	pub unsafe fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
24315	static_assert_uimm_bits!(IMM8, `2`);
24316	let b: __m512 = _mm512_castps128_ps512(b);
24317	match IMM8 & `0b11` {
24318	`0` => simd_shuffle!(
24319	a,
24320	b,
24321	[`16`, `17`, `18`, `19`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`],
24322	),
24323	`1` => simd_shuffle!(
24324	a,
24325	b,
24326	[`0`, `1`, `2`, `3`, `16`, `17`, `18`, `19`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`],
24327	),
24328	`2` => simd_shuffle!(
24329	a,
24330	b,
24331	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `16`, `17`, `18`, `19`, `12`, `13`, `14`, `15`],
24332	),
24333	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `16`, `17`, `18`, `19`]),
24334	}
24335	}
24336
24337	/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24338	///
24339	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
24340	#[inline]
24341	#[target_feature(enable = "avx512f")]
24342	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24343	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `2`))]
24344	#[rustc_legacy_const_generics(`4`)]
24345	pub unsafe fn _mm512_mask_insertf32x4<const IMM8: i32>(
24346	src: __m512,
24347	k: __mmask16,
24348	a: __m512,
24349	b: __m128,
24350	) -> __m512 {
24351	static_assert_uimm_bits!(IMM8, `2`);
24352	let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
24353	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
24354	}
24355
24356	/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24357	///
24358	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
24359	#[inline]
24360	#[target_feature(enable = "avx512f")]
24361	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24362	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `2`))]
24363	#[rustc_legacy_const_generics(`3`)]
24364	pub unsafe fn _mm512_maskz_insertf32x4<const IMM8: i32>(
24365	k: __mmask16,
24366	a: __m512,
24367	b: __m128,
24368	) -> __m512 {
24369	static_assert_uimm_bits!(IMM8, `2`);
24370	let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
24371	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
24372	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:zero))
24373	}
24374
24375	/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
24376	///
24377	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
24378	#[inline]
24379	#[target_feature(enable = "avx512f,avx512vl")]
24380	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24381	#[cfg_attr(
24382	all(test, not(target_os = "windows")),
24383	assert_instr(vinsert, IMM8 = `1`) //should be vinsertf32x4
24384	)]
24385	#[rustc_legacy_const_generics(`2`)]
24386	pub unsafe fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
24387	static_assert_uimm_bits!(IMM8, `1`);
24388	let b: __m256 = _mm256_castps128_ps256(b);
24389	match IMM8 & `0b1` {
24390	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`]),
24391	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]),
24392	}
24393	}
24394
24395	/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24396	///
24397	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
24398	#[inline]
24399	#[target_feature(enable = "avx512f,avx512vl")]
24400	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24401	#[cfg_attr(
24402	all(test, not(target_os = "windows")),
24403	assert_instr(vinsertf32x4, IMM8 = `1`)
24404	)]
24405	#[rustc_legacy_const_generics(`4`)]
24406	pub unsafe fn _mm256_mask_insertf32x4<const IMM8: i32>(
24407	src: __m256,
24408	k: __mmask8,
24409	a: __m256,
24410	b: __m128,
24411	) -> __m256 {
24412	static_assert_uimm_bits!(IMM8, `1`);
24413	let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
24414	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
24415	}
24416
24417	/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24418	///
24419	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
24420	#[inline]
24421	#[target_feature(enable = "avx512f,avx512vl")]
24422	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24423	#[cfg_attr(
24424	all(test, not(target_os = "windows")),
24425	assert_instr(vinsertf32x4, IMM8 = `1`)
24426	)]
24427	#[rustc_legacy_const_generics(`3`)]
24428	pub unsafe fn _mm256_maskz_insertf32x4<const IMM8: i32>(
24429	k: __mmask8,
24430	a: __m256,
24431	b: __m128,
24432	) -> __m256 {
24433	static_assert_uimm_bits!(IMM8, `1`);
24434	let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
24435	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
24436	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:zero))
24437	}
24438
24439	/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
24440	///
24441	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
24442	#[inline]
24443	#[target_feature(enable = "avx512f")]
24444	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24445	#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = `1`))]
24446	#[rustc_legacy_const_generics(`2`)]
24447	pub unsafe fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
24448	static_assert_uimm_bits!(IMM8, `1`);
24449	let b: __m512d = _mm512_castpd256_pd512(b);
24450	match IMM8 & `0b1` {
24451	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`]),
24452	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]),
24453	}
24454	}
24455
24456	/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24457	///
24458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
24459	#[inline]
24460	#[target_feature(enable = "avx512f")]
24461	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24462	#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = `1`))]
24463	#[rustc_legacy_const_generics(`4`)]
24464	pub unsafe fn _mm512_mask_insertf64x4<const IMM8: i32>(
24465	src: __m512d,
24466	k: __mmask8,
24467	a: __m512d,
24468	b: __m256d,
24469	) -> __m512d {
24470	static_assert_uimm_bits!(IMM8, `1`);
24471	let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
24472	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
24473	}
24474
24475	/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24476	///
24477	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
24478	#[inline]
24479	#[target_feature(enable = "avx512f")]
24480	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24481	#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = `1`))]
24482	#[rustc_legacy_const_generics(`3`)]
24483	pub unsafe fn _mm512_maskz_insertf64x4<const IMM8: i32>(
24484	k: __mmask8,
24485	a: __m512d,
24486	b: __m256d,
24487	) -> __m512d {
24488	static_assert_uimm_bits!(IMM8, `1`);
24489	let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
24490	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
24491	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:zero))
24492	}
24493
24494	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
24495	///
24496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
24497	#[inline]
24498	#[target_feature(enable = "avx512f")]
24499	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24500	#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
24501	pub unsafe fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
24502	let a: i32x16 = a.as_i32x16();
24503	let b: i32x16 = b.as_i32x16();
24504	#[rustfmt::skip]
24505	let r: i32x16 = simd_shuffle!(
24506	a, b,
24507	[ `2`, `18`, `3`, `19`,
24508	`2` + `4`, `18` + `4`, `3` + `4`, `19` + `4`,
24509	`2` + `8`, `18` + `8`, `3` + `8`, `19` + `8`,
24510	`2` + `12`, `18` + `12`, `3` + `12`, `19` + `12`],
24511	);
24512	transmute(src:r)
24513	}
24514
24515	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24516	///
24517	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
24518	#[inline]
24519	#[target_feature(enable = "avx512f")]
24520	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24521	#[cfg_attr(test, assert_instr(vpunpckhdq))]
24522	pub unsafe fn _mm512_mask_unpackhi_epi32(
24523	src: __m512i,
24524	k: __mmask16,
24525	a: __m512i,
24526	b: __m512i,
24527	) -> __m512i {
24528	let unpackhi: i32x16 = _mm512_unpackhi_epi32(a, b).as_i32x16();
24529	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x16()))
24530	}
24531
24532	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24533	///
24534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
24535	#[inline]
24536	#[target_feature(enable = "avx512f")]
24537	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24538	#[cfg_attr(test, assert_instr(vpunpckhdq))]
24539	pub unsafe fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
24540	let unpackhi: i32x16 = _mm512_unpackhi_epi32(a, b).as_i32x16();
24541	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
24542	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24543	}
24544
24545	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24546	///
24547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
24548	#[inline]
24549	#[target_feature(enable = "avx512f,avx512vl")]
24550	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24551	#[cfg_attr(test, assert_instr(vpunpckhdq))]
24552	pub unsafe fn _mm256_mask_unpackhi_epi32(
24553	src: __m256i,
24554	k: __mmask8,
24555	a: __m256i,
24556	b: __m256i,
24557	) -> __m256i {
24558	let unpackhi: i32x8 = _mm256_unpackhi_epi32(a, b).as_i32x8();
24559	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x8()))
24560	}
24561
24562	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24563	///
24564	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
24565	#[inline]
24566	#[target_feature(enable = "avx512f,avx512vl")]
24567	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24568	#[cfg_attr(test, assert_instr(vpunpckhdq))]
24569	pub unsafe fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24570	let unpackhi: i32x8 = _mm256_unpackhi_epi32(a, b).as_i32x8();
24571	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
24572	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24573	}
24574
24575	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24576	///
24577	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
24578	#[inline]
24579	#[target_feature(enable = "avx512f,avx512vl")]
24580	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24581	#[cfg_attr(test, assert_instr(vpunpckhdq))]
24582	pub unsafe fn _mm_mask_unpackhi_epi32(
24583	src: __m128i,
24584	k: __mmask8,
24585	a: __m128i,
24586	b: __m128i,
24587	) -> __m128i {
24588	let unpackhi: i32x4 = _mm_unpackhi_epi32(a, b).as_i32x4();
24589	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x4()))
24590	}
24591
24592	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24593	///
24594	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
24595	#[inline]
24596	#[target_feature(enable = "avx512f,avx512vl")]
24597	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24598	#[cfg_attr(test, assert_instr(vpunpckhdq))]
24599	pub unsafe fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
24600	let unpackhi: i32x4 = _mm_unpackhi_epi32(a, b).as_i32x4();
24601	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
24602	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24603	}
24604
24605	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
24606	///
24607	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
24608	#[inline]
24609	#[target_feature(enable = "avx512f")]
24610	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24611	#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
24612	pub unsafe fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
24613	simd_shuffle!(a, b, [`1`, `9`, `1` + `2`, `9` + `2`, `1` + `4`, `9` + `4`, `1` + `6`, `9` + `6`])
24614	}
24615
24616	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24617	///
24618	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
24619	#[inline]
24620	#[target_feature(enable = "avx512f")]
24621	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24622	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24623	pub unsafe fn _mm512_mask_unpackhi_epi64(
24624	src: __m512i,
24625	k: __mmask8,
24626	a: __m512i,
24627	b: __m512i,
24628	) -> __m512i {
24629	let unpackhi: i64x8 = _mm512_unpackhi_epi64(a, b).as_i64x8();
24630	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x8()))
24631	}
24632
24633	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24634	///
24635	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
24636	#[inline]
24637	#[target_feature(enable = "avx512f")]
24638	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24639	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24640	pub unsafe fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
24641	let unpackhi: i64x8 = _mm512_unpackhi_epi64(a, b).as_i64x8();
24642	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
24643	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24644	}
24645
24646	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24647	///
24648	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
24649	#[inline]
24650	#[target_feature(enable = "avx512f,avx512vl")]
24651	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24652	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24653	pub unsafe fn _mm256_mask_unpackhi_epi64(
24654	src: __m256i,
24655	k: __mmask8,
24656	a: __m256i,
24657	b: __m256i,
24658	) -> __m256i {
24659	let unpackhi: i64x4 = _mm256_unpackhi_epi64(a, b).as_i64x4();
24660	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x4()))
24661	}
24662
24663	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24664	///
24665	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
24666	#[inline]
24667	#[target_feature(enable = "avx512f,avx512vl")]
24668	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24669	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24670	pub unsafe fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24671	let unpackhi: i64x4 = _mm256_unpackhi_epi64(a, b).as_i64x4();
24672	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
24673	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24674	}
24675
24676	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24677	///
24678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
24679	#[inline]
24680	#[target_feature(enable = "avx512f,avx512vl")]
24681	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24682	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24683	pub unsafe fn _mm_mask_unpackhi_epi64(
24684	src: __m128i,
24685	k: __mmask8,
24686	a: __m128i,
24687	b: __m128i,
24688	) -> __m128i {
24689	let unpackhi: i64x2 = _mm_unpackhi_epi64(a, b).as_i64x2();
24690	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x2()))
24691	}
24692
24693	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24694	///
24695	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
24696	#[inline]
24697	#[target_feature(enable = "avx512f,avx512vl")]
24698	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24699	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24700	pub unsafe fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
24701	let unpackhi: i64x2 = _mm_unpackhi_epi64(a, b).as_i64x2();
24702	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
24703	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24704	}
24705
24706	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
24707	///
24708	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
24709	#[inline]
24710	#[target_feature(enable = "avx512f")]
24711	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24712	#[cfg_attr(test, assert_instr(vunpckhps))]
24713	pub unsafe fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
24714	#[rustfmt::skip]
24715	simd_shuffle!(
24716	a, b,
24717	[ `2`, `18`, `3`, `19`,
24718	`2` + `4`, `18` + `4`, `3` + `4`, `19` + `4`,
24719	`2` + `8`, `18` + `8`, `3` + `8`, `19` + `8`,
24720	`2` + `12`, `18` + `12`, `3` + `12`, `19` + `12`],
24721	)
24722	}
24723
24724	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24725	///
24726	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
24727	#[inline]
24728	#[target_feature(enable = "avx512f")]
24729	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24730	#[cfg_attr(test, assert_instr(vunpckhps))]
24731	pub unsafe fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
24732	let unpackhi: f32x16 = _mm512_unpackhi_ps(a, b).as_f32x16();
24733	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x16()))
24734	}
24735
24736	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24737	///
24738	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
24739	#[inline]
24740	#[target_feature(enable = "avx512f")]
24741	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24742	#[cfg_attr(test, assert_instr(vunpckhps))]
24743	pub unsafe fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24744	let unpackhi: f32x16 = _mm512_unpackhi_ps(a, b).as_f32x16();
24745	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
24746	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24747	}
24748
24749	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24750	///
24751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
24752	#[inline]
24753	#[target_feature(enable = "avx512f,avx512vl")]
24754	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24755	#[cfg_attr(test, assert_instr(vunpckhps))]
24756	pub unsafe fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
24757	let unpackhi: f32x8 = _mm256_unpackhi_ps(a, b).as_f32x8();
24758	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x8()))
24759	}
24760
24761	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24762	///
24763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
24764	#[inline]
24765	#[target_feature(enable = "avx512f,avx512vl")]
24766	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24767	#[cfg_attr(test, assert_instr(vunpckhps))]
24768	pub unsafe fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24769	let unpackhi: f32x8 = _mm256_unpackhi_ps(a, b).as_f32x8();
24770	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
24771	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24772	}
24773
24774	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24775	///
24776	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
24777	#[inline]
24778	#[target_feature(enable = "avx512f,avx512vl")]
24779	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24780	#[cfg_attr(test, assert_instr(vunpckhps))]
24781	pub unsafe fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
24782	let unpackhi: f32x4 = _mm_unpackhi_ps(a, b).as_f32x4();
24783	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x4()))
24784	}
24785
24786	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24787	///
24788	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
24789	#[inline]
24790	#[target_feature(enable = "avx512f,avx512vl")]
24791	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24792	#[cfg_attr(test, assert_instr(vunpckhps))]
24793	pub unsafe fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24794	let unpackhi: f32x4 = _mm_unpackhi_ps(a, b).as_f32x4();
24795	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
24796	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24797	}
24798
24799	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
24800	///
24801	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
24802	#[inline]
24803	#[target_feature(enable = "avx512f")]
24804	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24805	#[cfg_attr(test, assert_instr(vunpckhpd))]
24806	pub unsafe fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
24807	simd_shuffle!(a, b, [`1`, `9`, `1` + `2`, `9` + `2`, `1` + `4`, `9` + `4`, `1` + `6`, `9` + `6`])
24808	}
24809
24810	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24811	///
24812	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
24813	#[inline]
24814	#[target_feature(enable = "avx512f")]
24815	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24816	#[cfg_attr(test, assert_instr(vunpckhpd))]
24817	pub unsafe fn _mm512_mask_unpackhi_pd(
24818	src: __m512d,
24819	k: __mmask8,
24820	a: __m512d,
24821	b: __m512d,
24822	) -> __m512d {
24823	let unpackhi: f64x8 = _mm512_unpackhi_pd(a, b).as_f64x8();
24824	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x8()))
24825	}
24826
24827	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24828	///
24829	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
24830	#[inline]
24831	#[target_feature(enable = "avx512f")]
24832	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24833	#[cfg_attr(test, assert_instr(vunpckhpd))]
24834	pub unsafe fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24835	let unpackhi: f64x8 = _mm512_unpackhi_pd(a, b).as_f64x8();
24836	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
24837	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24838	}
24839
24840	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24841	///
24842	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
24843	#[inline]
24844	#[target_feature(enable = "avx512f,avx512vl")]
24845	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24846	#[cfg_attr(test, assert_instr(vunpckhpd))]
24847	pub unsafe fn _mm256_mask_unpackhi_pd(
24848	src: __m256d,
24849	k: __mmask8,
24850	a: __m256d,
24851	b: __m256d,
24852	) -> __m256d {
24853	let unpackhi: f64x4 = _mm256_unpackhi_pd(a, b).as_f64x4();
24854	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x4()))
24855	}
24856
24857	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24858	///
24859	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
24860	#[inline]
24861	#[target_feature(enable = "avx512f,avx512vl")]
24862	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24863	#[cfg_attr(test, assert_instr(vunpckhpd))]
24864	pub unsafe fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24865	let unpackhi: f64x4 = _mm256_unpackhi_pd(a, b).as_f64x4();
24866	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
24867	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24868	}
24869
24870	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24871	///
24872	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
24873	#[inline]
24874	#[target_feature(enable = "avx512f,avx512vl")]
24875	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24876	#[cfg_attr(test, assert_instr(vunpckhpd))]
24877	pub unsafe fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24878	let unpackhi: f64x2 = _mm_unpackhi_pd(a, b).as_f64x2();
24879	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x2()))
24880	}
24881
24882	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24883	///
24884	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
24885	#[inline]
24886	#[target_feature(enable = "avx512f,avx512vl")]
24887	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24888	#[cfg_attr(test, assert_instr(vunpckhpd))]
24889	pub unsafe fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24890	let unpackhi: f64x2 = _mm_unpackhi_pd(a, b).as_f64x2();
24891	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
24892	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24893	}
24894
24895	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
24896	///
24897	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
24898	#[inline]
24899	#[target_feature(enable = "avx512f")]
24900	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24901	#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
24902	pub unsafe fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
24903	let a: i32x16 = a.as_i32x16();
24904	let b: i32x16 = b.as_i32x16();
24905	#[rustfmt::skip]
24906	let r: i32x16 = simd_shuffle!(
24907	a, b,
24908	[ `0`, `16`, `1`, `17`,
24909	`0` + `4`, `16` + `4`, `1` + `4`, `17` + `4`,
24910	`0` + `8`, `16` + `8`, `1` + `8`, `17` + `8`,
24911	`0` + `12`, `16` + `12`, `1` + `12`, `17` + `12`],
24912	);
24913	transmute(src:r)
24914	}
24915
24916	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24917	///
24918	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
24919	#[inline]
24920	#[target_feature(enable = "avx512f")]
24921	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24922	#[cfg_attr(test, assert_instr(vpunpckldq))]
24923	pub unsafe fn _mm512_mask_unpacklo_epi32(
24924	src: __m512i,
24925	k: __mmask16,
24926	a: __m512i,
24927	b: __m512i,
24928	) -> __m512i {
24929	let unpacklo: i32x16 = _mm512_unpacklo_epi32(a, b).as_i32x16();
24930	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x16()))
24931	}
24932
24933	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24934	///
24935	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
24936	#[inline]
24937	#[target_feature(enable = "avx512f")]
24938	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24939	#[cfg_attr(test, assert_instr(vpunpckldq))]
24940	pub unsafe fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
24941	let unpacklo: i32x16 = _mm512_unpacklo_epi32(a, b).as_i32x16();
24942	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
24943	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
24944	}
24945
24946	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24947	///
24948	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
24949	#[inline]
24950	#[target_feature(enable = "avx512f,avx512vl")]
24951	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24952	#[cfg_attr(test, assert_instr(vpunpckldq))]
24953	pub unsafe fn _mm256_mask_unpacklo_epi32(
24954	src: __m256i,
24955	k: __mmask8,
24956	a: __m256i,
24957	b: __m256i,
24958	) -> __m256i {
24959	let unpacklo: i32x8 = _mm256_unpacklo_epi32(a, b).as_i32x8();
24960	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x8()))
24961	}
24962
24963	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24964	///
24965	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
24966	#[inline]
24967	#[target_feature(enable = "avx512f,avx512vl")]
24968	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24969	#[cfg_attr(test, assert_instr(vpunpckldq))]
24970	pub unsafe fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24971	let unpacklo: i32x8 = _mm256_unpacklo_epi32(a, b).as_i32x8();
24972	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
24973	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
24974	}
24975
24976	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24977	///
24978	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
24979	#[inline]
24980	#[target_feature(enable = "avx512f,avx512vl")]
24981	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24982	#[cfg_attr(test, assert_instr(vpunpckldq))]
24983	pub unsafe fn _mm_mask_unpacklo_epi32(
24984	src: __m128i,
24985	k: __mmask8,
24986	a: __m128i,
24987	b: __m128i,
24988	) -> __m128i {
24989	let unpacklo: i32x4 = _mm_unpacklo_epi32(a, b).as_i32x4();
24990	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x4()))
24991	}
24992
24993	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24994	///
24995	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
24996	#[inline]
24997	#[target_feature(enable = "avx512f,avx512vl")]
24998	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24999	#[cfg_attr(test, assert_instr(vpunpckldq))]
25000	pub unsafe fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25001	let unpacklo: i32x4 = _mm_unpacklo_epi32(a, b).as_i32x4();
25002	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
25003	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25004	}
25005
25006	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
25007	///
25008	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
25009	#[inline]
25010	#[target_feature(enable = "avx512f")]
25011	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25012	#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
25013	pub unsafe fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
25014	simd_shuffle!(a, b, [`0`, `8`, `0` + `2`, `8` + `2`, `0` + `4`, `8` + `4`, `0` + `6`, `8` + `6`])
25015	}
25016
25017	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25018	///
25019	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
25020	#[inline]
25021	#[target_feature(enable = "avx512f")]
25022	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25023	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25024	pub unsafe fn _mm512_mask_unpacklo_epi64(
25025	src: __m512i,
25026	k: __mmask8,
25027	a: __m512i,
25028	b: __m512i,
25029	) -> __m512i {
25030	let unpacklo: i64x8 = _mm512_unpacklo_epi64(a, b).as_i64x8();
25031	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x8()))
25032	}
25033
25034	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25035	///
25036	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
25037	#[inline]
25038	#[target_feature(enable = "avx512f")]
25039	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25040	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25041	pub unsafe fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
25042	let unpacklo: i64x8 = _mm512_unpacklo_epi64(a, b).as_i64x8();
25043	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
25044	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25045	}
25046
25047	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25048	///
25049	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
25050	#[inline]
25051	#[target_feature(enable = "avx512f,avx512vl")]
25052	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25053	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25054	pub unsafe fn _mm256_mask_unpacklo_epi64(
25055	src: __m256i,
25056	k: __mmask8,
25057	a: __m256i,
25058	b: __m256i,
25059	) -> __m256i {
25060	let unpacklo: i64x4 = _mm256_unpacklo_epi64(a, b).as_i64x4();
25061	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x4()))
25062	}
25063
25064	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25065	///
25066	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
25067	#[inline]
25068	#[target_feature(enable = "avx512f,avx512vl")]
25069	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25070	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25071	pub unsafe fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25072	let unpacklo: i64x4 = _mm256_unpacklo_epi64(a, b).as_i64x4();
25073	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
25074	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25075	}
25076
25077	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25078	///
25079	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
25080	#[inline]
25081	#[target_feature(enable = "avx512f,avx512vl")]
25082	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25083	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25084	pub unsafe fn _mm_mask_unpacklo_epi64(
25085	src: __m128i,
25086	k: __mmask8,
25087	a: __m128i,
25088	b: __m128i,
25089	) -> __m128i {
25090	let unpacklo: i64x2 = _mm_unpacklo_epi64(a, b).as_i64x2();
25091	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x2()))
25092	}
25093
25094	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25095	///
25096	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
25097	#[inline]
25098	#[target_feature(enable = "avx512f,avx512vl")]
25099	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25100	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25101	pub unsafe fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25102	let unpacklo: i64x2 = _mm_unpacklo_epi64(a, b).as_i64x2();
25103	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
25104	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25105	}
25106
25107	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
25108	///
25109	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
25110	#[inline]
25111	#[target_feature(enable = "avx512f")]
25112	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25113	#[cfg_attr(test, assert_instr(vunpcklps))]
25114	pub unsafe fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
25115	#[rustfmt::skip]
25116	simd_shuffle!(a, b,
25117	[ `0`, `16`, `1`, `17`,
25118	`0` + `4`, `16` + `4`, `1` + `4`, `17` + `4`,
25119	`0` + `8`, `16` + `8`, `1` + `8`, `17` + `8`,
25120	`0` + `12`, `16` + `12`, `1` + `12`, `17` + `12`],
25121	)
25122	}
25123
25124	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25125	///
25126	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
25127	#[inline]
25128	#[target_feature(enable = "avx512f")]
25129	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25130	#[cfg_attr(test, assert_instr(vunpcklps))]
25131	pub unsafe fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
25132	let unpacklo: f32x16 = _mm512_unpacklo_ps(a, b).as_f32x16();
25133	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x16()))
25134	}
25135
25136	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25137	///
25138	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
25139	#[inline]
25140	#[target_feature(enable = "avx512f")]
25141	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25142	#[cfg_attr(test, assert_instr(vunpcklps))]
25143	pub unsafe fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
25144	let unpacklo: f32x16 = _mm512_unpacklo_ps(a, b).as_f32x16();
25145	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
25146	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25147	}
25148
25149	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25150	///
25151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
25152	#[inline]
25153	#[target_feature(enable = "avx512f,avx512vl")]
25154	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25155	#[cfg_attr(test, assert_instr(vunpcklps))]
25156	pub unsafe fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
25157	let unpacklo: f32x8 = _mm256_unpacklo_ps(a, b).as_f32x8();
25158	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x8()))
25159	}
25160
25161	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25162	///
25163	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
25164	#[inline]
25165	#[target_feature(enable = "avx512f,avx512vl")]
25166	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25167	#[cfg_attr(test, assert_instr(vunpcklps))]
25168	pub unsafe fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
25169	let unpacklo: f32x8 = _mm256_unpacklo_ps(a, b).as_f32x8();
25170	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
25171	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25172	}
25173
25174	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25175	///
25176	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
25177	#[inline]
25178	#[target_feature(enable = "avx512f,avx512vl")]
25179	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25180	#[cfg_attr(test, assert_instr(vunpcklps))]
25181	pub unsafe fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
25182	let unpacklo: f32x4 = _mm_unpacklo_ps(a, b).as_f32x4();
25183	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x4()))
25184	}
25185
25186	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25187	///
25188	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
25189	#[inline]
25190	#[target_feature(enable = "avx512f,avx512vl")]
25191	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25192	#[cfg_attr(test, assert_instr(vunpcklps))]
25193	pub unsafe fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
25194	let unpacklo: f32x4 = _mm_unpacklo_ps(a, b).as_f32x4();
25195	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
25196	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25197	}
25198
25199	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
25200	///
25201	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
25202	#[inline]
25203	#[target_feature(enable = "avx512f")]
25204	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25205	#[cfg_attr(test, assert_instr(vunpcklpd))]
25206	pub unsafe fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
25207	simd_shuffle!(a, b, [`0`, `8`, `0` + `2`, `8` + `2`, `0` + `4`, `8` + `4`, `0` + `6`, `8` + `6`])
25208	}
25209
25210	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25211	///
25212	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
25213	#[inline]
25214	#[target_feature(enable = "avx512f")]
25215	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25216	#[cfg_attr(test, assert_instr(vunpcklpd))]
25217	pub unsafe fn _mm512_mask_unpacklo_pd(
25218	src: __m512d,
25219	k: __mmask8,
25220	a: __m512d,
25221	b: __m512d,
25222	) -> __m512d {
25223	let unpacklo: f64x8 = _mm512_unpacklo_pd(a, b).as_f64x8();
25224	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x8()))
25225	}
25226
25227	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25228	///
25229	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
25230	#[inline]
25231	#[target_feature(enable = "avx512f")]
25232	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25233	#[cfg_attr(test, assert_instr(vunpcklpd))]
25234	pub unsafe fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
25235	let unpacklo: f64x8 = _mm512_unpacklo_pd(a, b).as_f64x8();
25236	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
25237	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25238	}
25239
25240	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25241	///
25242	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
25243	#[inline]
25244	#[target_feature(enable = "avx512f,avx512vl")]
25245	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25246	#[cfg_attr(test, assert_instr(vunpcklpd))]
25247	pub unsafe fn _mm256_mask_unpacklo_pd(
25248	src: __m256d,
25249	k: __mmask8,
25250	a: __m256d,
25251	b: __m256d,
25252	) -> __m256d {
25253	let unpacklo: f64x4 = _mm256_unpacklo_pd(a, b).as_f64x4();
25254	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x4()))
25255	}
25256
25257	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25258	///
25259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
25260	#[inline]
25261	#[target_feature(enable = "avx512f,avx512vl")]
25262	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25263	#[cfg_attr(test, assert_instr(vunpcklpd))]
25264	pub unsafe fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
25265	let unpacklo: f64x4 = _mm256_unpacklo_pd(a, b).as_f64x4();
25266	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
25267	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25268	}
25269
25270	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25271	///
25272	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
25273	#[inline]
25274	#[target_feature(enable = "avx512f,avx512vl")]
25275	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25276	#[cfg_attr(test, assert_instr(vunpcklpd))]
25277	pub unsafe fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
25278	let unpacklo: f64x2 = _mm_unpacklo_pd(a, b).as_f64x2();
25279	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x2()))
25280	}
25281
25282	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25283	///
25284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
25285	#[inline]
25286	#[target_feature(enable = "avx512f,avx512vl")]
25287	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25288	#[cfg_attr(test, assert_instr(vunpcklpd))]
25289	pub unsafe fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
25290	let unpacklo: f64x2 = _mm_unpacklo_pd(a, b).as_f64x2();
25291	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
25292	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25293	}
25294
25295	/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25296	///
25297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castps128_ps512&expand=621)
25298	#[inline]
25299	#[target_feature(enable = "avx512f")]
25300	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25301	pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 {
25302	simd_shuffle!(
25303	a,
25304	_mm_set1_ps(`-1.`),
25305	[`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`],
25306	)
25307	}
25308
25309	/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25310	///
25311	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castps256_ps512&expand=623)
25312	#[inline]
25313	#[target_feature(enable = "avx512f")]
25314	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25315	pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 {
25316	simd_shuffle!(
25317	a,
25318	_mm256_set1_ps(`-1.`),
25319	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`],
25320	)
25321	}
25322
25323	/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25324	///
25325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_zextps128_ps512&expand=6196)
25326	#[inline]
25327	#[target_feature(enable = "avx512f")]
25328	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25329	pub unsafe fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
25330	simd_shuffle!(
25331	a,
25332	_mm_set1_ps(`0.`),
25333	[`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`],
25334	)
25335	}
25336
25337	/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25338	///
25339	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_zextps256_ps512&expand=6197)
25340	#[inline]
25341	#[target_feature(enable = "avx512f")]
25342	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25343	pub unsafe fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
25344	simd_shuffle!(
25345	a,
25346	_mm256_set1_ps(`0.`),
25347	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`],
25348	)
25349	}
25350
25351	/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25352	///
25353	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castps512_ps128&expand=624)
25354	#[inline]
25355	#[target_feature(enable = "avx512f")]
25356	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25357	pub unsafe fn _mm512_castps512_ps128(a: __m512) -> __m128 {
25358	simd_shuffle!(a, a, [`0`, `1`, `2`, `3`])
25359	}
25360
25361	/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25362	///
25363	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castps512_ps256&expand=625)
25364	#[inline]
25365	#[target_feature(enable = "avx512f")]
25366	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25367	pub unsafe fn _mm512_castps512_ps256(a: __m512) -> __m256 {
25368	simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`])
25369	}
25370
25371	/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25372	///
25373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castps_pd&expand=616)
25374	#[inline]
25375	#[target_feature(enable = "avx512f")]
25376	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25377	pub unsafe fn _mm512_castps_pd(a: __m512) -> __m512d {
25378	transmute(src:a.as_m512())
25379	}
25380
25381	/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25382	///
25383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castps_si512&expand=619)
25384	#[inline]
25385	#[target_feature(enable = "avx512f")]
25386	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25387	pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i {
25388	transmute(src:a.as_m512())
25389	}
25390
25391	/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25392	///
25393	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castpd128_pd512&expand=609)
25394	#[inline]
25395	#[target_feature(enable = "avx512f")]
25396	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25397	pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
25398	simd_shuffle!(a, _mm_set1_pd(`-1.`), [`0`, `1`, `2`, `2`, `2`, `2`, `2`, `2`])
25399	}
25400
25401	/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25402	///
25403	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castpd256_pd512&expand=611)
25404	#[inline]
25405	#[target_feature(enable = "avx512f")]
25406	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25407	pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
25408	simd_shuffle!(a, _mm256_set1_pd(`-1.`), [`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`])
25409	}
25410
25411	/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25412	///
25413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_zextpd128_pd512&expand=6193)
25414	#[inline]
25415	#[target_feature(enable = "avx512f")]
25416	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25417	pub unsafe fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
25418	simd_shuffle!(a, _mm_set1_pd(`0.`), [`0`, `1`, `2`, `2`, `2`, `2`, `2`, `2`])
25419	}
25420
25421	/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25422	///
25423	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_zextpd256_pd512&expand=6194)
25424	#[inline]
25425	#[target_feature(enable = "avx512f")]
25426	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25427	pub unsafe fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
25428	simd_shuffle!(a, _mm256_set1_pd(`0.`), [`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`])
25429	}
25430
25431	/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25432	///
25433	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castpd512_pd128&expand=612)
25434	#[inline]
25435	#[target_feature(enable = "avx512f")]
25436	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25437	pub unsafe fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
25438	simd_shuffle!(a, a, [`0`, `1`])
25439	}
25440
25441	/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25442	///
25443	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castpd512_pd256&expand=613)
25444	#[inline]
25445	#[target_feature(enable = "avx512f")]
25446	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25447	pub unsafe fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
25448	simd_shuffle!(a, a, [`0`, `1`, `2`, `3`])
25449	}
25450
25451	/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25452	///
25453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castpd_ps&expand=604)
25454	#[inline]
25455	#[target_feature(enable = "avx512f")]
25456	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25457	pub unsafe fn _mm512_castpd_ps(a: __m512d) -> __m512 {
25458	transmute(src:a.as_m512d())
25459	}
25460
25461	/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25462	///
25463	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castpd_si512&expand=607)
25464	#[inline]
25465	#[target_feature(enable = "avx512f")]
25466	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25467	pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i {
25468	transmute(src:a.as_m512d())
25469	}
25470
25471	/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25472	///
25473	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castsi128_si512&expand=629)
25474	#[inline]
25475	#[target_feature(enable = "avx512f")]
25476	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25477	pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
25478	simd_shuffle!(a, _mm_set1_epi64x(`-1`), [`0`, `1`, `2`, `2`, `2`, `2`, `2`, `2`])
25479	}
25480
25481	/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25482	///
25483	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castsi256_si512&expand=633)
25484	#[inline]
25485	#[target_feature(enable = "avx512f")]
25486	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25487	pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
25488	simd_shuffle!(a, _mm256_set1_epi64x(`-1`), [`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`])
25489	}
25490
25491	/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25492	///
25493	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_zextsi128_si512&expand=6199)
25494	#[inline]
25495	#[target_feature(enable = "avx512f")]
25496	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25497	pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
25498	simd_shuffle!(a, _mm_set1_epi64x(`0`), [`0`, `1`, `2`, `2`, `2`, `2`, `2`, `2`])
25499	}
25500
25501	/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25502	///
25503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_zextsi256_si512&expand=6200)
25504	#[inline]
25505	#[target_feature(enable = "avx512f")]
25506	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25507	pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
25508	simd_shuffle!(a, _mm256_set1_epi64x(`0`), [`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`])
25509	}
25510
25511	/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25512	///
25513	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castsi512_si128&expand=636)
25514	#[inline]
25515	#[target_feature(enable = "avx512f")]
25516	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25517	pub unsafe fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
25518	simd_shuffle!(a, a, [`0`, `1`])
25519	}
25520
25521	/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25522	///
25523	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castsi512_si256&expand=637)
25524	#[inline]
25525	#[target_feature(enable = "avx512f")]
25526	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25527	pub unsafe fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
25528	simd_shuffle!(a, a, [`0`, `1`, `2`, `3`])
25529	}
25530
25531	/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25532	///
25533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castsi512_ps&expand=635)
25534	#[inline]
25535	#[target_feature(enable = "avx512f")]
25536	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25537	pub unsafe fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
25538	transmute(src:a)
25539	}
25540
25541	/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25542	///
25543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castsi512_pd&expand=634)
25544	#[inline]
25545	#[target_feature(enable = "avx512f")]
25546	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25547	pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
25548	transmute(src:a)
25549	}
25550
25551	/// Copy the lower 32-bit integer in a to dst.
25552	///
25553	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
25554	#[inline]
25555	#[target_feature(enable = "avx512f")]
25556	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25557	#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(vmovd))]
25558	pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
25559	let extract: i32 = simd_extract!(a.as_i32x16(), `0`);
25560	extract
25561	}
25562
25563	/// Broadcast the low packed 32-bit integer from a to all elements of dst.
25564	///
25565	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
25566	#[inline]
25567	#[target_feature(enable = "avx512f")]
25568	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25569	#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
25570	pub unsafe fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
25571	let a: i32x16 = _mm512_castsi128_si512(a).as_i32x16();
25572	let ret: i32x16 = simd_shuffle!(a, a, [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]);
25573	transmute(src:ret)
25574	}
25575
25576	/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25577	///
25578	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
25579	#[inline]
25580	#[target_feature(enable = "avx512f")]
25581	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25582	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25583	pub unsafe fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
25584	let broadcast: i32x16 = _mm512_broadcastd_epi32(a).as_i32x16();
25585	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
25586	}
25587
25588	/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25589	///
25590	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
25591	#[inline]
25592	#[target_feature(enable = "avx512f")]
25593	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25594	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25595	pub unsafe fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
25596	let broadcast: i32x16 = _mm512_broadcastd_epi32(a).as_i32x16();
25597	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
25598	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25599	}
25600
25601	/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25602	///
25603	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
25604	#[inline]
25605	#[target_feature(enable = "avx512f,avx512vl")]
25606	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25607	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25608	pub unsafe fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
25609	let broadcast: i32x8 = _mm256_broadcastd_epi32(a).as_i32x8();
25610	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
25611	}
25612
25613	/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25614	///
25615	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
25616	#[inline]
25617	#[target_feature(enable = "avx512f,avx512vl")]
25618	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25619	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25620	pub unsafe fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
25621	let broadcast: i32x8 = _mm256_broadcastd_epi32(a).as_i32x8();
25622	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
25623	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25624	}
25625
25626	/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25627	///
25628	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
25629	#[inline]
25630	#[target_feature(enable = "avx512f,avx512vl")]
25631	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25632	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25633	pub unsafe fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
25634	let broadcast: i32x4 = _mm_broadcastd_epi32(a).as_i32x4();
25635	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x4()))
25636	}
25637
25638	/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25639	///
25640	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
25641	#[inline]
25642	#[target_feature(enable = "avx512f,avx512vl")]
25643	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25644	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25645	pub unsafe fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
25646	let broadcast: i32x4 = _mm_broadcastd_epi32(a).as_i32x4();
25647	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
25648	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25649	}
25650
25651	/// Broadcast the low packed 64-bit integer from a to all elements of dst.
25652	///
25653	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
25654	#[inline]
25655	#[target_feature(enable = "avx512f")]
25656	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25657	#[cfg_attr(test, assert_instr(vbroadcas))] //should be vpbroadcastq
25658	pub unsafe fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
25659	simd_shuffle!(a, a, [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`])
25660	}
25661
25662	/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25663	///
25664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
25665	#[inline]
25666	#[target_feature(enable = "avx512f")]
25667	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25668	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25669	pub unsafe fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
25670	let broadcast: i64x8 = _mm512_broadcastq_epi64(a).as_i64x8();
25671	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
25672	}
25673
25674	/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25675	///
25676	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
25677	#[inline]
25678	#[target_feature(enable = "avx512f")]
25679	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25680	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25681	pub unsafe fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
25682	let broadcast: i64x8 = _mm512_broadcastq_epi64(a).as_i64x8();
25683	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
25684	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25685	}
25686
25687	/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25688	///
25689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
25690	#[inline]
25691	#[target_feature(enable = "avx512f,avx512vl")]
25692	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25693	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25694	pub unsafe fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
25695	let broadcast: i64x4 = _mm256_broadcastq_epi64(a).as_i64x4();
25696	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x4()))
25697	}
25698
25699	/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25700	///
25701	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
25702	#[inline]
25703	#[target_feature(enable = "avx512f,avx512vl")]
25704	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25705	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25706	pub unsafe fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
25707	let broadcast: i64x4 = _mm256_broadcastq_epi64(a).as_i64x4();
25708	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
25709	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25710	}
25711
25712	/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25713	///
25714	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
25715	#[inline]
25716	#[target_feature(enable = "avx512f,avx512vl")]
25717	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25718	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25719	pub unsafe fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
25720	let broadcast: i64x2 = _mm_broadcastq_epi64(a).as_i64x2();
25721	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x2()))
25722	}
25723
25724	/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25725	///
25726	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
25727	#[inline]
25728	#[target_feature(enable = "avx512f,avx512vl")]
25729	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25730	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25731	pub unsafe fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
25732	let broadcast: i64x2 = _mm_broadcastq_epi64(a).as_i64x2();
25733	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
25734	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25735	}
25736
25737	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
25738	///
25739	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
25740	#[inline]
25741	#[target_feature(enable = "avx512f")]
25742	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25743	#[cfg_attr(test, assert_instr(vbroadcastss))]
25744	pub unsafe fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
25745	simd_shuffle!(a, a, [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`])
25746	}
25747
25748	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25749	///
25750	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
25751	#[inline]
25752	#[target_feature(enable = "avx512f")]
25753	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25754	#[cfg_attr(test, assert_instr(vbroadcastss))]
25755	pub unsafe fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
25756	let broadcast: f32x16 = _mm512_broadcastss_ps(a).as_f32x16();
25757	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
25758	}
25759
25760	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25761	///
25762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
25763	#[inline]
25764	#[target_feature(enable = "avx512f")]
25765	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25766	#[cfg_attr(test, assert_instr(vbroadcastss))]
25767	pub unsafe fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
25768	let broadcast: f32x16 = _mm512_broadcastss_ps(a).as_f32x16();
25769	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
25770	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25771	}
25772
25773	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25774	///
25775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
25776	#[inline]
25777	#[target_feature(enable = "avx512f,avx512vl")]
25778	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25779	#[cfg_attr(test, assert_instr(vbroadcastss))]
25780	pub unsafe fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
25781	let broadcast: f32x8 = _mm256_broadcastss_ps(a).as_f32x8();
25782	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
25783	}
25784
25785	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25786	///
25787	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
25788	#[inline]
25789	#[target_feature(enable = "avx512f,avx512vl")]
25790	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25791	#[cfg_attr(test, assert_instr(vbroadcastss))]
25792	pub unsafe fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
25793	let broadcast: f32x8 = _mm256_broadcastss_ps(a).as_f32x8();
25794	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
25795	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25796	}
25797
25798	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25799	///
25800	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
25801	#[inline]
25802	#[target_feature(enable = "avx512f,avx512vl")]
25803	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25804	#[cfg_attr(test, assert_instr(vbroadcastss))]
25805	pub unsafe fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25806	let broadcast: f32x4 = _mm_broadcastss_ps(a).as_f32x4();
25807	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x4()))
25808	}
25809
25810	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25811	///
25812	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
25813	#[inline]
25814	#[target_feature(enable = "avx512f,avx512vl")]
25815	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25816	#[cfg_attr(test, assert_instr(vbroadcastss))]
25817	pub unsafe fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
25818	let broadcast: f32x4 = _mm_broadcastss_ps(a).as_f32x4();
25819	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
25820	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25821	}
25822
25823	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
25824	///
25825	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
25826	#[inline]
25827	#[target_feature(enable = "avx512f")]
25828	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25829	#[cfg_attr(test, assert_instr(vbroadcastsd))]
25830	pub unsafe fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
25831	simd_shuffle!(a, a, [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`])
25832	}
25833
25834	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25835	///
25836	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
25837	#[inline]
25838	#[target_feature(enable = "avx512f")]
25839	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25840	#[cfg_attr(test, assert_instr(vbroadcastsd))]
25841	pub unsafe fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
25842	let broadcast: f64x8 = _mm512_broadcastsd_pd(a).as_f64x8();
25843	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
25844	}
25845
25846	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25847	///
25848	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
25849	#[inline]
25850	#[target_feature(enable = "avx512f")]
25851	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25852	#[cfg_attr(test, assert_instr(vbroadcastsd))]
25853	pub unsafe fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
25854	let broadcast: f64x8 = _mm512_broadcastsd_pd(a).as_f64x8();
25855	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
25856	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25857	}
25858
25859	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25860	///
25861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
25862	#[inline]
25863	#[target_feature(enable = "avx512f,avx512vl")]
25864	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25865	#[cfg_attr(test, assert_instr(vbroadcastsd))]
25866	pub unsafe fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
25867	let broadcast: f64x4 = _mm256_broadcastsd_pd(a).as_f64x4();
25868	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x4()))
25869	}
25870
25871	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25872	///
25873	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
25874	#[inline]
25875	#[target_feature(enable = "avx512f,avx512vl")]
25876	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25877	#[cfg_attr(test, assert_instr(vbroadcastsd))]
25878	pub unsafe fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
25879	let broadcast: f64x4 = _mm256_broadcastsd_pd(a).as_f64x4();
25880	let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
25881	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25882	}
25883
25884	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
25885	///
25886	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
25887	#[inline]
25888	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
25889	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25890	pub unsafe fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
25891	let a: i32x4 = a.as_i32x4();
25892	let ret: i32x16 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]);
25893	transmute(src:ret)
25894	}
25895
25896	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25897	///
25898	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
25899	#[inline]
25900	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
25901	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25902	pub unsafe fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
25903	let broadcast: i32x16 = _mm512_broadcast_i32x4(a).as_i32x16();
25904	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
25905	}
25906
25907	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25908	///
25909	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
25910	#[inline]
25911	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
25912	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25913	pub unsafe fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
25914	let broadcast: i32x16 = _mm512_broadcast_i32x4(a).as_i32x16();
25915	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
25916	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25917	}
25918
25919	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
25920	///
25921	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
25922	#[inline]
25923	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
25924	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25925	pub unsafe fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
25926	let a: i32x4 = a.as_i32x4();
25927	let ret: i32x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]);
25928	transmute(src:ret)
25929	}
25930
25931	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25932	///
25933	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
25934	#[inline]
25935	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
25936	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25937	pub unsafe fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
25938	let broadcast: i32x8 = _mm256_broadcast_i32x4(a).as_i32x8();
25939	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
25940	}
25941
25942	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25943	///
25944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
25945	#[inline]
25946	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
25947	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25948	pub unsafe fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
25949	let broadcast: i32x8 = _mm256_broadcast_i32x4(a).as_i32x8();
25950	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
25951	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25952	}
25953
25954	/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
25955	///
25956	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_broadcast_i64x4&expand=522)
25957	#[inline]
25958	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
25959	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25960	pub unsafe fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
25961	simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`])
25962	}
25963
25964	/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25965	///
25966	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_broadcast_i64x4&expand=523)
25967	#[inline]
25968	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
25969	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25970	pub unsafe fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
25971	let broadcast: i64x8 = _mm512_broadcast_i64x4(a).as_i64x8();
25972	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
25973	}
25974
25975	/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25976	///
25977	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_broadcast_i64x4&expand=524)
25978	#[inline]
25979	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
25980	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25981	pub unsafe fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
25982	let broadcast: i64x8 = _mm512_broadcast_i64x4(a).as_i64x8();
25983	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
25984	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25985	}
25986
25987	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
25988	///
25989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
25990	#[inline]
25991	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
25992	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25993	pub unsafe fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
25994	simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`])
25995	}
25996
25997	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25998	///
25999	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
26000	#[inline]
26001	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
26002	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26003	pub unsafe fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
26004	let broadcast: f32x16 = _mm512_broadcast_f32x4(a).as_f32x16();
26005	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
26006	}
26007
26008	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26009	///
26010	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
26011	#[inline]
26012	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
26013	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26014	pub unsafe fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
26015	let broadcast: f32x16 = _mm512_broadcast_f32x4(a).as_f32x16();
26016	let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
26017	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
26018	}
26019
26020	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
26021	///
26022	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
26023	#[inline]
26024	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
26025	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26026	pub unsafe fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
26027	simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`])
26028	}
26029
26030	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26031	///
26032	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
26033	#[inline]
26034	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
26035	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26036	pub unsafe fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
26037	let broadcast: f32x8 = _mm256_broadcast_f32x4(a).as_f32x8();
26038	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
26039	}
26040
26041	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26042	///
26043	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
26044	#[inline]
26045	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
26046	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26047	pub unsafe fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
26048	let broadcast: f32x8 = _mm256_broadcast_f32x4(a).as_f32x8();
26049	let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
26050	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
26051	}
26052
26053	/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
26054	///
26055	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_broadcast_f64x4&expand=495)
26056	#[inline]
26057	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
26058	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26059	pub unsafe fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
26060	simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`])
26061	}
26062
26063	/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26064	///
26065	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_broadcast_f64x4&expand=496)
26066	#[inline]
26067	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
26068	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26069	pub unsafe fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
26070	let broadcast: f64x8 = _mm512_broadcast_f64x4(a).as_f64x8();
26071	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
26072	}
26073
26074	/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26075	///
26076	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_broadcast_f64x4&expand=497)
26077	#[inline]
26078	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
26079	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26080	pub unsafe fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
26081	let broadcast: f64x8 = _mm512_broadcast_f64x4(a).as_f64x8();
26082	let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
26083	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
26084	}
26085
26086	/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
26087	///
26088	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
26089	#[inline]
26090	#[target_feature(enable = "avx512f")]
26091	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26092	#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
26093	pub unsafe fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26094	transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x16(), no:a.as_i32x16()))
26095	}
26096
26097	/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
26098	///
26099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
26100	#[inline]
26101	#[target_feature(enable = "avx512f,avx512vl")]
26102	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26103	#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
26104	pub unsafe fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26105	transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x8(), no:a.as_i32x8()))
26106	}
26107
26108	/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
26109	///
26110	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
26111	#[inline]
26112	#[target_feature(enable = "avx512f,avx512vl")]
26113	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26114	#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
26115	pub unsafe fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26116	transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x4(), no:a.as_i32x4()))
26117	}
26118
26119	/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
26120	///
26121	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
26122	#[inline]
26123	#[target_feature(enable = "avx512f")]
26124	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26125	#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
26126	pub unsafe fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26127	transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x8(), no:a.as_i64x8()))
26128	}
26129
26130	/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
26131	///
26132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
26133	#[inline]
26134	#[target_feature(enable = "avx512f,avx512vl")]
26135	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26136	#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
26137	pub unsafe fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26138	transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x4(), no:a.as_i64x4()))
26139	}
26140
26141	/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
26142	///
26143	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
26144	#[inline]
26145	#[target_feature(enable = "avx512f,avx512vl")]
26146	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26147	#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
26148	pub unsafe fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26149	transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x2(), no:a.as_i64x2()))
26150	}
26151
26152	/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26153	///
26154	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
26155	#[inline]
26156	#[target_feature(enable = "avx512f")]
26157	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26158	#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
26159	pub unsafe fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26160	transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x16(), no:a.as_f32x16()))
26161	}
26162
26163	/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26164	///
26165	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
26166	#[inline]
26167	#[target_feature(enable = "avx512f,avx512vl")]
26168	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26169	#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
26170	pub unsafe fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26171	transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:a.as_f32x8()))
26172	}
26173
26174	/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26175	///
26176	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
26177	#[inline]
26178	#[target_feature(enable = "avx512f,avx512vl")]
26179	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26180	#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
26181	pub unsafe fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26182	transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x4(), no:a.as_f32x4()))
26183	}
26184
26185	/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26186	///
26187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
26188	#[inline]
26189	#[target_feature(enable = "avx512f")]
26190	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26191	#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
26192	pub unsafe fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26193	transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x8(), no:a.as_f64x8()))
26194	}
26195
26196	/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26197	///
26198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
26199	#[inline]
26200	#[target_feature(enable = "avx512f,avx512vl")]
26201	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26202	#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
26203	pub unsafe fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26204	transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x4(), no:a.as_f64x4()))
26205	}
26206
26207	/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26208	///
26209	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
26210	#[inline]
26211	#[target_feature(enable = "avx512f,avx512vl")]
26212	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26213	#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
26214	pub unsafe fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26215	transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:a.as_f64x2()))
26216	}
26217
26218	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
26219	///
26220	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_alignr_epi32&expand=245)
26221	#[inline]
26222	#[target_feature(enable = "avx512f")]
26223	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26224	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
26225	#[rustc_legacy_const_generics(`2`)]
26226	pub unsafe fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
26227	static_assert_uimm_bits!(IMM8, `8`);
26228	let a = a.as_i32x16();
26229	let b = b.as_i32x16();
26230	let imm8: i32 = IMM8 % `16`;
26231	let r: i32x16 = match imm8 {
26232	`0` => simd_shuffle!(
26233	a,
26234	b,
26235	[`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,],
26236	),
26237	`1` => simd_shuffle!(
26238	a,
26239	b,
26240	[`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`,],
26241	),
26242	`2` => simd_shuffle!(
26243	a,
26244	b,
26245	[`18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`],
26246	),
26247	`3` => simd_shuffle!(
26248	a,
26249	b,
26250	[`19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`],
26251	),
26252	`4` => simd_shuffle!(
26253	a,
26254	b,
26255	[`20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`],
26256	),
26257	`5` => simd_shuffle!(
26258	a,
26259	b,
26260	[`21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`],
26261	),
26262	`6` => simd_shuffle!(
26263	a,
26264	b,
26265	[`22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`],
26266	),
26267	`7` => simd_shuffle!(
26268	a,
26269	b,
26270	[`23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`],
26271	),
26272	`8` => simd_shuffle!(
26273	a,
26274	b,
26275	[`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`],
26276	),
26277	`9` => simd_shuffle!(
26278	a,
26279	b,
26280	[`25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
26281	),
26282	`10` => simd_shuffle!(a, b, [`26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`]),
26283	`11` => simd_shuffle!(a, b, [`27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`]),
26284	`12` => simd_shuffle!(a, b, [`28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`]),
26285	`13` => simd_shuffle!(a, b, [`29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`]),
26286	`14` => simd_shuffle!(a, b, [`30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`]),
26287	_ => simd_shuffle!(a, b, [`31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`]),
26288	};
26289	transmute(r)
26290	}
26291
26292	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26293	///
26294	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_alignr_epi32&expand=246)
26295	#[inline]
26296	#[target_feature(enable = "avx512f")]
26297	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26298	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
26299	#[rustc_legacy_const_generics(`4`)]
26300	pub unsafe fn _mm512_mask_alignr_epi32<const IMM8: i32>(
26301	src: __m512i,
26302	k: __mmask16,
26303	a: __m512i,
26304	b: __m512i,
26305	) -> __m512i {
26306	static_assert_uimm_bits!(IMM8, `8`);
26307	let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
26308	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
26309	}
26310
26311	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26312	///
26313	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_alignr_epi32&expand=247)
26314	#[inline]
26315	#[target_feature(enable = "avx512f")]
26316	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26317	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
26318	#[rustc_legacy_const_generics(`3`)]
26319	pub unsafe fn _mm512_maskz_alignr_epi32<const IMM8: i32>(
26320	k: __mmask16,
26321	a: __m512i,
26322	b: __m512i,
26323	) -> __m512i {
26324	static_assert_uimm_bits!(IMM8, `8`);
26325	let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
26326	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
26327	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:zero))
26328	}
26329
26330	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
26331	///
26332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
26333	#[inline]
26334	#[target_feature(enable = "avx512f,avx512vl")]
26335	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26336	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
26337	#[rustc_legacy_const_generics(`2`)]
26338	pub unsafe fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
26339	static_assert_uimm_bits!(IMM8, `8`);
26340	let a = a.as_i32x8();
26341	let b = b.as_i32x8();
26342	let imm8: i32 = IMM8 % `16`;
26343	let r: i32x8 = match imm8 {
26344	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
26345	`1` => simd_shuffle!(a, b, [`9`, `10`, `11`, `12`, `13`, `14`, `15`, `0`]),
26346	`2` => simd_shuffle!(a, b, [`10`, `11`, `12`, `13`, `14`, `15`, `0`, `1`]),
26347	`3` => simd_shuffle!(a, b, [`11`, `12`, `13`, `14`, `15`, `0`, `1`, `2`]),
26348	`4` => simd_shuffle!(a, b, [`12`, `13`, `14`, `15`, `0`, `1`, `2`, `3`]),
26349	`5` => simd_shuffle!(a, b, [`13`, `14`, `15`, `0`, `1`, `2`, `3`, `4`]),
26350	`6` => simd_shuffle!(a, b, [`14`, `15`, `0`, `1`, `2`, `3`, `4`, `5`]),
26351	`7` => simd_shuffle!(a, b, [`15`, `0`, `1`, `2`, `3`, `4`, `5`, `6`]),
26352	`8` => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]),
26353	`9` => simd_shuffle!(a, b, [`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`]),
26354	`10` => simd_shuffle!(a, b, [`2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`]),
26355	`11` => simd_shuffle!(a, b, [`3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`]),
26356	`12` => simd_shuffle!(a, b, [`4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`]),
26357	`13` => simd_shuffle!(a, b, [`5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`]),
26358	`14` => simd_shuffle!(a, b, [`6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`]),
26359	_ => simd_shuffle!(a, b, [`7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`]),
26360	};
26361	transmute(r)
26362	}
26363
26364	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26365	///
26366	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
26367	#[inline]
26368	#[target_feature(enable = "avx512f,avx512vl")]
26369	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26370	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
26371	#[rustc_legacy_const_generics(`4`)]
26372	pub unsafe fn _mm256_mask_alignr_epi32<const IMM8: i32>(
26373	src: __m256i,
26374	k: __mmask8,
26375	a: __m256i,
26376	b: __m256i,
26377	) -> __m256i {
26378	static_assert_uimm_bits!(IMM8, `8`);
26379	let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
26380	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
26381	}
26382
26383	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26384	///
26385	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
26386	#[inline]
26387	#[target_feature(enable = "avx512f,avx512vl")]
26388	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26389	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
26390	#[rustc_legacy_const_generics(`3`)]
26391	pub unsafe fn _mm256_maskz_alignr_epi32<const IMM8: i32>(
26392	k: __mmask8,
26393	a: __m256i,
26394	b: __m256i,
26395	) -> __m256i {
26396	static_assert_uimm_bits!(IMM8, `8`);
26397	let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
26398	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
26399	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:zero))
26400	}
26401
26402	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
26403	///
26404	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
26405	#[inline]
26406	#[target_feature(enable = "avx512f,avx512vl")]
26407	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26408	#[cfg_attr(test, assert_instr(vpalignr, IMM8 = `1`))] //should be valignd
26409	#[rustc_legacy_const_generics(`2`)]
26410	pub unsafe fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
26411	static_assert_uimm_bits!(IMM8, `8`);
26412	let a: i32x4 = a.as_i32x4();
26413	let b: i32x4 = b.as_i32x4();
26414	let imm8: i32 = IMM8 % `8`;
26415	let r: i32x4 = match imm8 {
26416	`0` => simd_shuffle!(a, b, [`4`, `5`, `6`, `7`]),
26417	`1` => simd_shuffle!(a, b, [`5`, `6`, `7`, `0`]),
26418	`2` => simd_shuffle!(a, b, [`6`, `7`, `0`, `1`]),
26419	`3` => simd_shuffle!(a, b, [`7`, `0`, `1`, `2`]),
26420	`4` => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`]),
26421	`5` => simd_shuffle!(a, b, [`1`, `2`, `3`, `0`]),
26422	`6` => simd_shuffle!(a, b, [`2`, `3`, `0`, `1`]),
26423	_ => simd_shuffle!(a, b, [`3`, `0`, `1`, `2`]),
26424	};
26425	transmute(src:r)
26426	}
26427
26428	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26429	///
26430	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
26431	#[inline]
26432	#[target_feature(enable = "avx512f,avx512vl")]
26433	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26434	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
26435	#[rustc_legacy_const_generics(`4`)]
26436	pub unsafe fn _mm_mask_alignr_epi32<const IMM8: i32>(
26437	src: __m128i,
26438	k: __mmask8,
26439	a: __m128i,
26440	b: __m128i,
26441	) -> __m128i {
26442	static_assert_uimm_bits!(IMM8, `8`);
26443	let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
26444	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
26445	}
26446
26447	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26448	///
26449	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
26450	#[inline]
26451	#[target_feature(enable = "avx512f,avx512vl")]
26452	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26453	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
26454	#[rustc_legacy_const_generics(`3`)]
26455	pub unsafe fn _mm_maskz_alignr_epi32<const IMM8: i32>(
26456	k: __mmask8,
26457	a: __m128i,
26458	b: __m128i,
26459	) -> __m128i {
26460	static_assert_uimm_bits!(IMM8, `8`);
26461	let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
26462	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
26463	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:zero))
26464	}
26465
26466	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
26467	///
26468	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_alignr_epi64&expand=254)
26469	#[inline]
26470	#[target_feature(enable = "avx512f")]
26471	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26472	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
26473	#[rustc_legacy_const_generics(`2`)]
26474	pub unsafe fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
26475	static_assert_uimm_bits!(IMM8, `8`);
26476	let imm8: i32 = IMM8 % `8`;
26477	let r: i64x8 = match imm8 {
26478	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
26479	`1` => simd_shuffle!(a, b, [`9`, `10`, `11`, `12`, `13`, `14`, `15`, `0`]),
26480	`2` => simd_shuffle!(a, b, [`10`, `11`, `12`, `13`, `14`, `15`, `0`, `1`]),
26481	`3` => simd_shuffle!(a, b, [`11`, `12`, `13`, `14`, `15`, `0`, `1`, `2`]),
26482	`4` => simd_shuffle!(a, b, [`12`, `13`, `14`, `15`, `0`, `1`, `2`, `3`]),
26483	`5` => simd_shuffle!(a, b, [`13`, `14`, `15`, `0`, `1`, `2`, `3`, `4`]),
26484	`6` => simd_shuffle!(a, b, [`14`, `15`, `0`, `1`, `2`, `3`, `4`, `5`]),
26485	_ => simd_shuffle!(a, b, [`15`, `0`, `1`, `2`, `3`, `4`, `5`, `6`]),
26486	};
26487	transmute(src:r)
26488	}
26489
26490	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26491	///
26492	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_alignr_epi64&expand=255)
26493	#[inline]
26494	#[target_feature(enable = "avx512f")]
26495	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26496	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
26497	#[rustc_legacy_const_generics(`4`)]
26498	pub unsafe fn _mm512_mask_alignr_epi64<const IMM8: i32>(
26499	src: __m512i,
26500	k: __mmask8,
26501	a: __m512i,
26502	b: __m512i,
26503	) -> __m512i {
26504	static_assert_uimm_bits!(IMM8, `8`);
26505	let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
26506	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
26507	}
26508
26509	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26510	///
26511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_alignr_epi64&expand=256)
26512	#[inline]
26513	#[target_feature(enable = "avx512f")]
26514	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26515	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
26516	#[rustc_legacy_const_generics(`3`)]
26517	pub unsafe fn _mm512_maskz_alignr_epi64<const IMM8: i32>(
26518	k: __mmask8,
26519	a: __m512i,
26520	b: __m512i,
26521	) -> __m512i {
26522	static_assert_uimm_bits!(IMM8, `8`);
26523	let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
26524	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
26525	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:zero))
26526	}
26527
26528	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
26529	///
26530	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
26531	#[inline]
26532	#[target_feature(enable = "avx512f,avx512vl")]
26533	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26534	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
26535	#[rustc_legacy_const_generics(`2`)]
26536	pub unsafe fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
26537	static_assert_uimm_bits!(IMM8, `8`);
26538	let imm8: i32 = IMM8 % `8`;
26539	let r: i64x4 = match imm8 {
26540	`0` => simd_shuffle!(a, b, [`4`, `5`, `6`, `7`]),
26541	`1` => simd_shuffle!(a, b, [`5`, `6`, `7`, `0`]),
26542	`2` => simd_shuffle!(a, b, [`6`, `7`, `0`, `1`]),
26543	`3` => simd_shuffle!(a, b, [`7`, `0`, `1`, `2`]),
26544	`4` => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`]),
26545	`5` => simd_shuffle!(a, b, [`1`, `2`, `3`, `4`]),
26546	`6` => simd_shuffle!(a, b, [`2`, `3`, `4`, `5`]),
26547	_ => simd_shuffle!(a, b, [`3`, `4`, `5`, `6`]),
26548	};
26549	transmute(src:r)
26550	}
26551
26552	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26553	///
26554	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
26555	#[inline]
26556	#[target_feature(enable = "avx512f,avx512vl")]
26557	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26558	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
26559	#[rustc_legacy_const_generics(`4`)]
26560	pub unsafe fn _mm256_mask_alignr_epi64<const IMM8: i32>(
26561	src: __m256i,
26562	k: __mmask8,
26563	a: __m256i,
26564	b: __m256i,
26565	) -> __m256i {
26566	static_assert_uimm_bits!(IMM8, `8`);
26567	let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
26568	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
26569	}
26570
26571	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26572	///
26573	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
26574	#[inline]
26575	#[target_feature(enable = "avx512f,avx512vl")]
26576	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26577	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
26578	#[rustc_legacy_const_generics(`3`)]
26579	pub unsafe fn _mm256_maskz_alignr_epi64<const IMM8: i32>(
26580	k: __mmask8,
26581	a: __m256i,
26582	b: __m256i,
26583	) -> __m256i {
26584	static_assert_uimm_bits!(IMM8, `8`);
26585	let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
26586	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
26587	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:zero))
26588	}
26589
26590	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
26591	///
26592	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
26593	#[inline]
26594	#[target_feature(enable = "avx512f,avx512vl")]
26595	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26596	#[cfg_attr(test, assert_instr(vpalignr, IMM8 = `1`))] //should be valignq
26597	#[rustc_legacy_const_generics(`2`)]
26598	pub unsafe fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
26599	static_assert_uimm_bits!(IMM8, `8`);
26600	let imm8: i32 = IMM8 % `4`;
26601	let r: i64x2 = match imm8 {
26602	`0` => simd_shuffle!(a, b, [`2`, `3`]),
26603	`1` => simd_shuffle!(a, b, [`3`, `0`]),
26604	`2` => simd_shuffle!(a, b, [`0`, `1`]),
26605	_ => simd_shuffle!(a, b, [`1`, `2`]),
26606	};
26607	transmute(src:r)
26608	}
26609
26610	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26611	///
26612	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
26613	#[inline]
26614	#[target_feature(enable = "avx512f,avx512vl")]
26615	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26616	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
26617	#[rustc_legacy_const_generics(`4`)]
26618	pub unsafe fn _mm_mask_alignr_epi64<const IMM8: i32>(
26619	src: __m128i,
26620	k: __mmask8,
26621	a: __m128i,
26622	b: __m128i,
26623	) -> __m128i {
26624	static_assert_uimm_bits!(IMM8, `8`);
26625	let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
26626	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:src.as_i64x2()))
26627	}
26628
26629	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26630	///
26631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
26632	#[inline]
26633	#[target_feature(enable = "avx512f,avx512vl")]
26634	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26635	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
26636	#[rustc_legacy_const_generics(`3`)]
26637	pub unsafe fn _mm_maskz_alignr_epi64<const IMM8: i32>(
26638	k: __mmask8,
26639	a: __m128i,
26640	b: __m128i,
26641	) -> __m128i {
26642	static_assert_uimm_bits!(IMM8, `8`);
26643	let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
26644	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
26645	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:zero))
26646	}
26647
26648	/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
26649	///
26650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
26651	#[inline]
26652	#[target_feature(enable = "avx512f")]
26653	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26654	#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
26655	pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
26656	transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16()))
26657	}
26658
26659	/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26660	///
26661	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
26662	#[inline]
26663	#[target_feature(enable = "avx512f")]
26664	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26665	#[cfg_attr(test, assert_instr(vpandd))]
26666	pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26667	let and: i32x16 = _mm512_and_epi32(a, b).as_i32x16();
26668	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x16()))
26669	}
26670
26671	/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26672	///
26673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
26674	#[inline]
26675	#[target_feature(enable = "avx512f")]
26676	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26677	#[cfg_attr(test, assert_instr(vpandd))]
26678	pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26679	let and: i32x16 = _mm512_and_epi32(a, b).as_i32x16();
26680	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
26681	transmute(src:simd_select_bitmask(m:k, yes:and, no:zero))
26682	}
26683
26684	/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26685	///
26686	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
26687	#[inline]
26688	#[target_feature(enable = "avx512f,avx512vl")]
26689	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26690	#[cfg_attr(test, assert_instr(vpandd))]
26691	pub unsafe fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26692	let and: i32x8 = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
26693	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x8()))
26694	}
26695
26696	/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26697	///
26698	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
26699	#[inline]
26700	#[target_feature(enable = "avx512f,avx512vl")]
26701	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26702	#[cfg_attr(test, assert_instr(vpandd))]
26703	pub unsafe fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26704	let and: i32x8 = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
26705	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
26706	transmute(src:simd_select_bitmask(m:k, yes:and, no:zero))
26707	}
26708
26709	/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26710	///
26711	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
26712	#[inline]
26713	#[target_feature(enable = "avx512f,avx512vl")]
26714	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26715	#[cfg_attr(test, assert_instr(vpandd))]
26716	pub unsafe fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26717	let and: i32x4 = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
26718	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x4()))
26719	}
26720
26721	/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26722	///
26723	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
26724	#[inline]
26725	#[target_feature(enable = "avx512f,avx512vl")]
26726	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26727	#[cfg_attr(test, assert_instr(vpandd))]
26728	pub unsafe fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26729	let and: i32x4 = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
26730	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
26731	transmute(src:simd_select_bitmask(m:k, yes:and, no:zero))
26732	}
26733
26734	/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
26735	///
26736	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
26737	#[inline]
26738	#[target_feature(enable = "avx512f")]
26739	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26740	#[cfg_attr(test, assert_instr(vpandq))]
26741	pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
26742	transmute(src:simd_and(x:a.as_i64x8(), y:b.as_i64x8()))
26743	}
26744
26745	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26746	///
26747	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
26748	#[inline]
26749	#[target_feature(enable = "avx512f")]
26750	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26751	#[cfg_attr(test, assert_instr(vpandq))]
26752	pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26753	let and: i64x8 = _mm512_and_epi64(a, b).as_i64x8();
26754	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x8()))
26755	}
26756
26757	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26758	///
26759	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
26760	#[inline]
26761	#[target_feature(enable = "avx512f")]
26762	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26763	#[cfg_attr(test, assert_instr(vpandq))]
26764	pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26765	let and: i64x8 = _mm512_and_epi64(a, b).as_i64x8();
26766	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
26767	transmute(src:simd_select_bitmask(m:k, yes:and, no:zero))
26768	}
26769
26770	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26771	///
26772	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
26773	#[inline]
26774	#[target_feature(enable = "avx512f,avx512vl")]
26775	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26776	#[cfg_attr(test, assert_instr(vpandq))]
26777	pub unsafe fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26778	let and: i64x4 = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
26779	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x4()))
26780	}
26781
26782	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26783	///
26784	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
26785	#[inline]
26786	#[target_feature(enable = "avx512f,avx512vl")]
26787	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26788	#[cfg_attr(test, assert_instr(vpandq))]
26789	pub unsafe fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26790	let and: i64x4 = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
26791	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
26792	transmute(src:simd_select_bitmask(m:k, yes:and, no:zero))
26793	}
26794
26795	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26796	///
26797	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
26798	#[inline]
26799	#[target_feature(enable = "avx512f,avx512vl")]
26800	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26801	#[cfg_attr(test, assert_instr(vpandq))]
26802	pub unsafe fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26803	let and: i64x2 = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
26804	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x2()))
26805	}
26806
26807	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26808	///
26809	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
26810	#[inline]
26811	#[target_feature(enable = "avx512f,avx512vl")]
26812	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26813	#[cfg_attr(test, assert_instr(vpandq))]
26814	pub unsafe fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26815	let and: i64x2 = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
26816	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
26817	transmute(src:simd_select_bitmask(m:k, yes:and, no:zero))
26818	}
26819
26820	/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
26821	///
26822	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
26823	#[inline]
26824	#[target_feature(enable = "avx512f")]
26825	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26826	#[cfg_attr(test, assert_instr(vpandq))]
26827	pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
26828	transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16()))
26829	}
26830
26831	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
26832	///
26833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
26834	#[inline]
26835	#[target_feature(enable = "avx512f")]
26836	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26837	#[cfg_attr(test, assert_instr(vporq))]
26838	pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
26839	transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16()))
26840	}
26841
26842	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26843	///
26844	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
26845	#[inline]
26846	#[target_feature(enable = "avx512f")]
26847	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26848	#[cfg_attr(test, assert_instr(vpord))]
26849	pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26850	let or: i32x16 = _mm512_or_epi32(a, b).as_i32x16();
26851	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x16()))
26852	}
26853
26854	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26855	///
26856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
26857	#[inline]
26858	#[target_feature(enable = "avx512f")]
26859	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26860	#[cfg_attr(test, assert_instr(vpord))]
26861	pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26862	let or: i32x16 = _mm512_or_epi32(a, b).as_i32x16();
26863	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
26864	transmute(src:simd_select_bitmask(m:k, yes:or, no:zero))
26865	}
26866
26867	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
26868	///
26869	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
26870	#[inline]
26871	#[target_feature(enable = "avx512f,avx512vl")]
26872	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26873	#[cfg_attr(test, assert_instr(vor))] //should be vpord
26874	pub unsafe fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
26875	transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8()))
26876	}
26877
26878	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26879	///
26880	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
26881	#[inline]
26882	#[target_feature(enable = "avx512f,avx512vl")]
26883	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26884	#[cfg_attr(test, assert_instr(vpord))]
26885	pub unsafe fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26886	let or: i32x8 = _mm256_or_epi32(a, b).as_i32x8();
26887	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x8()))
26888	}
26889
26890	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26891	///
26892	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
26893	#[inline]
26894	#[target_feature(enable = "avx512f,avx512vl")]
26895	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26896	#[cfg_attr(test, assert_instr(vpord))]
26897	pub unsafe fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26898	let or: i32x8 = _mm256_or_epi32(a, b).as_i32x8();
26899	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
26900	transmute(src:simd_select_bitmask(m:k, yes:or, no:zero))
26901	}
26902
26903	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
26904	///
26905	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
26906	#[inline]
26907	#[target_feature(enable = "avx512f,avx512vl")]
26908	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26909	#[cfg_attr(test, assert_instr(vor))] //should be vpord
26910	pub unsafe fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
26911	transmute(src:simd_or(x:a.as_i32x4(), y:b.as_i32x4()))
26912	}
26913
26914	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26915	///
26916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
26917	#[inline]
26918	#[target_feature(enable = "avx512f,avx512vl")]
26919	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26920	#[cfg_attr(test, assert_instr(vpord))]
26921	pub unsafe fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26922	let or: i32x4 = _mm_or_epi32(a, b).as_i32x4();
26923	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x4()))
26924	}
26925
26926	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26927	///
26928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
26929	#[inline]
26930	#[target_feature(enable = "avx512f,avx512vl")]
26931	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26932	#[cfg_attr(test, assert_instr(vpord))]
26933	pub unsafe fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26934	let or: i32x4 = _mm_or_epi32(a, b).as_i32x4();
26935	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
26936	transmute(src:simd_select_bitmask(m:k, yes:or, no:zero))
26937	}
26938
26939	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
26940	///
26941	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
26942	#[inline]
26943	#[target_feature(enable = "avx512f")]
26944	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26945	#[cfg_attr(test, assert_instr(vporq))]
26946	pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
26947	transmute(src:simd_or(x:a.as_i64x8(), y:b.as_i64x8()))
26948	}
26949
26950	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26951	///
26952	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
26953	#[inline]
26954	#[target_feature(enable = "avx512f")]
26955	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26956	#[cfg_attr(test, assert_instr(vporq))]
26957	pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26958	let or: i64x8 = _mm512_or_epi64(a, b).as_i64x8();
26959	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x8()))
26960	}
26961
26962	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26963	///
26964	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
26965	#[inline]
26966	#[target_feature(enable = "avx512f")]
26967	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26968	#[cfg_attr(test, assert_instr(vporq))]
26969	pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26970	let or: i64x8 = _mm512_or_epi64(a, b).as_i64x8();
26971	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
26972	transmute(src:simd_select_bitmask(m:k, yes:or, no:zero))
26973	}
26974
26975	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
26976	///
26977	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
26978	#[inline]
26979	#[target_feature(enable = "avx512f,avx512vl")]
26980	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26981	#[cfg_attr(test, assert_instr(vor))] //should be vporq
26982	pub unsafe fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
26983	transmute(src:simd_or(x:a.as_i64x4(), y:b.as_i64x4()))
26984	}
26985
26986	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26987	///
26988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
26989	#[inline]
26990	#[target_feature(enable = "avx512f,avx512vl")]
26991	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26992	#[cfg_attr(test, assert_instr(vporq))]
26993	pub unsafe fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26994	let or: i64x4 = _mm256_or_epi64(a, b).as_i64x4();
26995	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x4()))
26996	}
26997
26998	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26999	///
27000	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
27001	#[inline]
27002	#[target_feature(enable = "avx512f,avx512vl")]
27003	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27004	#[cfg_attr(test, assert_instr(vporq))]
27005	pub unsafe fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27006	let or: i64x4 = _mm256_or_epi64(a, b).as_i64x4();
27007	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
27008	transmute(src:simd_select_bitmask(m:k, yes:or, no:zero))
27009	}
27010
27011	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
27012	///
27013	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
27014	#[inline]
27015	#[target_feature(enable = "avx512f,avx512vl")]
27016	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27017	#[cfg_attr(test, assert_instr(vor))] //should be vporq
27018	pub unsafe fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
27019	transmute(src:simd_or(x:a.as_i64x2(), y:b.as_i64x2()))
27020	}
27021
27022	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27023	///
27024	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
27025	#[inline]
27026	#[target_feature(enable = "avx512f,avx512vl")]
27027	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27028	#[cfg_attr(test, assert_instr(vporq))]
27029	pub unsafe fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27030	let or: i64x2 = _mm_or_epi64(a, b).as_i64x2();
27031	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x2()))
27032	}
27033
27034	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27035	///
27036	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
27037	#[inline]
27038	#[target_feature(enable = "avx512f,avx512vl")]
27039	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27040	#[cfg_attr(test, assert_instr(vporq))]
27041	pub unsafe fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27042	let or: i64x2 = _mm_or_epi64(a, b).as_i64x2();
27043	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
27044	transmute(src:simd_select_bitmask(m:k, yes:or, no:zero))
27045	}
27046
27047	/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
27048	///
27049	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
27050	#[inline]
27051	#[target_feature(enable = "avx512f")]
27052	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27053	#[cfg_attr(test, assert_instr(vporq))]
27054	pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
27055	transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16()))
27056	}
27057
27058	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
27059	///
27060	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
27061	#[inline]
27062	#[target_feature(enable = "avx512f")]
27063	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27064	#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
27065	pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
27066	transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16()))
27067	}
27068
27069	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27070	///
27071	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
27072	#[inline]
27073	#[target_feature(enable = "avx512f")]
27074	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27075	#[cfg_attr(test, assert_instr(vpxord))]
27076	pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27077	let xor: i32x16 = _mm512_xor_epi32(a, b).as_i32x16();
27078	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x16()))
27079	}
27080
27081	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27082	///
27083	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
27084	#[inline]
27085	#[target_feature(enable = "avx512f")]
27086	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27087	#[cfg_attr(test, assert_instr(vpxord))]
27088	pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27089	let xor: i32x16 = _mm512_xor_epi32(a, b).as_i32x16();
27090	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
27091	transmute(src:simd_select_bitmask(m:k, yes:xor, no:zero))
27092	}
27093
27094	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
27095	///
27096	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
27097	#[inline]
27098	#[target_feature(enable = "avx512f,avx512vl")]
27099	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27100	#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
27101	pub unsafe fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
27102	transmute(src:simd_xor(x:a.as_i32x8(), y:b.as_i32x8()))
27103	}
27104
27105	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27106	///
27107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
27108	#[inline]
27109	#[target_feature(enable = "avx512f,avx512vl")]
27110	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27111	#[cfg_attr(test, assert_instr(vpxord))]
27112	pub unsafe fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27113	let xor: i32x8 = _mm256_xor_epi32(a, b).as_i32x8();
27114	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x8()))
27115	}
27116
27117	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27118	///
27119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
27120	#[inline]
27121	#[target_feature(enable = "avx512f,avx512vl")]
27122	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27123	#[cfg_attr(test, assert_instr(vpxord))]
27124	pub unsafe fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27125	let xor: i32x8 = _mm256_xor_epi32(a, b).as_i32x8();
27126	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
27127	transmute(src:simd_select_bitmask(m:k, yes:xor, no:zero))
27128	}
27129
27130	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
27131	///
27132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
27133	#[inline]
27134	#[target_feature(enable = "avx512f,avx512vl")]
27135	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27136	#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
27137	pub unsafe fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
27138	transmute(src:simd_xor(x:a.as_i32x4(), y:b.as_i32x4()))
27139	}
27140
27141	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27142	///
27143	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
27144	#[inline]
27145	#[target_feature(enable = "avx512f,avx512vl")]
27146	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27147	#[cfg_attr(test, assert_instr(vpxord))]
27148	pub unsafe fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27149	let xor: i32x4 = _mm_xor_epi32(a, b).as_i32x4();
27150	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x4()))
27151	}
27152
27153	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27154	///
27155	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
27156	#[inline]
27157	#[target_feature(enable = "avx512f,avx512vl")]
27158	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27159	#[cfg_attr(test, assert_instr(vpxord))]
27160	pub unsafe fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27161	let xor: i32x4 = _mm_xor_epi32(a, b).as_i32x4();
27162	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
27163	transmute(src:simd_select_bitmask(m:k, yes:xor, no:zero))
27164	}
27165
27166	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
27167	///
27168	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
27169	#[inline]
27170	#[target_feature(enable = "avx512f")]
27171	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27172	#[cfg_attr(test, assert_instr(vpxorq))]
27173	pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
27174	transmute(src:simd_xor(x:a.as_i64x8(), y:b.as_i64x8()))
27175	}
27176
27177	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27178	///
27179	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
27180	#[inline]
27181	#[target_feature(enable = "avx512f")]
27182	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27183	#[cfg_attr(test, assert_instr(vpxorq))]
27184	pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27185	let xor: i64x8 = _mm512_xor_epi64(a, b).as_i64x8();
27186	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x8()))
27187	}
27188
27189	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27190	///
27191	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
27192	#[inline]
27193	#[target_feature(enable = "avx512f")]
27194	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27195	#[cfg_attr(test, assert_instr(vpxorq))]
27196	pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27197	let xor: i64x8 = _mm512_xor_epi64(a, b).as_i64x8();
27198	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
27199	transmute(src:simd_select_bitmask(m:k, yes:xor, no:zero))
27200	}
27201
27202	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
27203	///
27204	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
27205	#[inline]
27206	#[target_feature(enable = "avx512f,avx512vl")]
27207	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27208	#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
27209	pub unsafe fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
27210	transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4()))
27211	}
27212
27213	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27214	///
27215	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
27216	#[inline]
27217	#[target_feature(enable = "avx512f,avx512vl")]
27218	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27219	#[cfg_attr(test, assert_instr(vpxorq))]
27220	pub unsafe fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27221	let xor: i64x4 = _mm256_xor_epi64(a, b).as_i64x4();
27222	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x4()))
27223	}
27224
27225	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27226	///
27227	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
27228	#[inline]
27229	#[target_feature(enable = "avx512f,avx512vl")]
27230	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27231	#[cfg_attr(test, assert_instr(vpxorq))]
27232	pub unsafe fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27233	let xor: i64x4 = _mm256_xor_epi64(a, b).as_i64x4();
27234	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
27235	transmute(src:simd_select_bitmask(m:k, yes:xor, no:zero))
27236	}
27237
27238	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
27239	///
27240	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
27241	#[inline]
27242	#[target_feature(enable = "avx512f,avx512vl")]
27243	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27244	#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
27245	pub unsafe fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
27246	transmute(src:simd_xor(x:a.as_i64x2(), y:b.as_i64x2()))
27247	}
27248
27249	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27250	///
27251	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
27252	#[inline]
27253	#[target_feature(enable = "avx512f,avx512vl")]
27254	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27255	#[cfg_attr(test, assert_instr(vpxorq))]
27256	pub unsafe fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27257	let xor: i64x2 = _mm_xor_epi64(a, b).as_i64x2();
27258	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x2()))
27259	}
27260
27261	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27262	///
27263	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
27264	#[inline]
27265	#[target_feature(enable = "avx512f,avx512vl")]
27266	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27267	#[cfg_attr(test, assert_instr(vpxorq))]
27268	pub unsafe fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27269	let xor: i64x2 = _mm_xor_epi64(a, b).as_i64x2();
27270	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
27271	transmute(src:simd_select_bitmask(m:k, yes:xor, no:zero))
27272	}
27273
27274	/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
27275	///
27276	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
27277	#[inline]
27278	#[target_feature(enable = "avx512f")]
27279	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27280	#[cfg_attr(test, assert_instr(vpxorq))]
27281	pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
27282	transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16()))
27283	}
27284
27285	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
27286	///
27287	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
27288	#[inline]
27289	#[target_feature(enable = "avx512f")]
27290	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27291	#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
27292	pub unsafe fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
27293	_mm512_and_epi32(a:_mm512_xor_epi32(a, b:_mm512_set1_epi32(u32::MAX as i32)), b)
27294	}
27295
27296	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27297	///
27298	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
27299	#[inline]
27300	#[target_feature(enable = "avx512f")]
27301	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27302	#[cfg_attr(test, assert_instr(vpandnd))]
27303	pub unsafe fn _mm512_mask_andnot_epi32(
27304	src: __m512i,
27305	k: __mmask16,
27306	a: __m512i,
27307	b: __m512i,
27308	) -> __m512i {
27309	let andnot: i32x16 = _mm512_andnot_epi32(a, b).as_i32x16();
27310	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x16()))
27311	}
27312
27313	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27314	///
27315	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
27316	#[inline]
27317	#[target_feature(enable = "avx512f")]
27318	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27319	#[cfg_attr(test, assert_instr(vpandnd))]
27320	pub unsafe fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27321	let andnot: i32x16 = _mm512_andnot_epi32(a, b).as_i32x16();
27322	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
27323	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:zero))
27324	}
27325
27326	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27327	///
27328	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
27329	#[inline]
27330	#[target_feature(enable = "avx512f,avx512vl")]
27331	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27332	#[cfg_attr(test, assert_instr(vpandnd))]
27333	pub unsafe fn _mm256_mask_andnot_epi32(
27334	src: __m256i,
27335	k: __mmask8,
27336	a: __m256i,
27337	b: __m256i,
27338	) -> __m256i {
27339	let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
27340	let andnot: i32x8 = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
27341	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x8()))
27342	}
27343
27344	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27345	///
27346	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
27347	#[inline]
27348	#[target_feature(enable = "avx512f,avx512vl")]
27349	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27350	#[cfg_attr(test, assert_instr(vpandnd))]
27351	pub unsafe fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27352	let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
27353	let andnot: i32x8 = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
27354	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
27355	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:zero))
27356	}
27357
27358	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27359	///
27360	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
27361	#[inline]
27362	#[target_feature(enable = "avx512f,avx512vl")]
27363	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27364	#[cfg_attr(test, assert_instr(vpandnd))]
27365	pub unsafe fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27366	let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
27367	let andnot: i32x4 = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
27368	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x4()))
27369	}
27370
27371	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27372	///
27373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
27374	#[inline]
27375	#[target_feature(enable = "avx512f,avx512vl")]
27376	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27377	#[cfg_attr(test, assert_instr(vpandnd))]
27378	pub unsafe fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27379	let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
27380	let andnot: i32x4 = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
27381	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
27382	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:zero))
27383	}
27384
27385	/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
27386	///
27387	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
27388	#[inline]
27389	#[target_feature(enable = "avx512f")]
27390	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27391	#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
27392	pub unsafe fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
27393	_mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
27394	}
27395
27396	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27397	///
27398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
27399	#[inline]
27400	#[target_feature(enable = "avx512f")]
27401	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27402	#[cfg_attr(test, assert_instr(vpandnq))]
27403	pub unsafe fn _mm512_mask_andnot_epi64(
27404	src: __m512i,
27405	k: __mmask8,
27406	a: __m512i,
27407	b: __m512i,
27408	) -> __m512i {
27409	let andnot: i64x8 = _mm512_andnot_epi64(a, b).as_i64x8();
27410	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x8()))
27411	}
27412
27413	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27414	///
27415	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
27416	#[inline]
27417	#[target_feature(enable = "avx512f")]
27418	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27419	#[cfg_attr(test, assert_instr(vpandnq))]
27420	pub unsafe fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27421	let andnot: i64x8 = _mm512_andnot_epi64(a, b).as_i64x8();
27422	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
27423	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:zero))
27424	}
27425
27426	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27427	///
27428	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
27429	#[inline]
27430	#[target_feature(enable = "avx512f,avx512vl")]
27431	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27432	#[cfg_attr(test, assert_instr(vpandnq))]
27433	pub unsafe fn _mm256_mask_andnot_epi64(
27434	src: __m256i,
27435	k: __mmask8,
27436	a: __m256i,
27437	b: __m256i,
27438	) -> __m256i {
27439	let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
27440	let andnot: i64x4 = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
27441	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x4()))
27442	}
27443
27444	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27445	///
27446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
27447	#[inline]
27448	#[target_feature(enable = "avx512f,avx512vl")]
27449	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27450	#[cfg_attr(test, assert_instr(vpandnq))]
27451	pub unsafe fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27452	let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
27453	let andnot: i64x4 = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
27454	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
27455	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:zero))
27456	}
27457
27458	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27459	///
27460	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
27461	#[inline]
27462	#[target_feature(enable = "avx512f,avx512vl")]
27463	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27464	#[cfg_attr(test, assert_instr(vpandnq))]
27465	pub unsafe fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27466	let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
27467	let andnot: i64x2 = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
27468	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x2()))
27469	}
27470
27471	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27472	///
27473	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
27474	#[inline]
27475	#[target_feature(enable = "avx512f,avx512vl")]
27476	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27477	#[cfg_attr(test, assert_instr(vpandnq))]
27478	pub unsafe fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27479	let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
27480	let andnot: i64x2 = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
27481	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
27482	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:zero))
27483	}
27484
27485	/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
27486	///
27487	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
27488	#[inline]
27489	#[target_feature(enable = "avx512f")]
27490	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27491	#[cfg_attr(test, assert_instr(vpandnq))]
27492	pub unsafe fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
27493	_mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
27494	}
27495
27496	/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
27497	///
27498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
27499	#[inline]
27500	#[target_feature(enable = "avx512f")]
27501	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27502	#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
27503	pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27504	a & b
27505	}
27506
27507	/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
27508	///
27509	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kand&expand=3210)
27510	#[inline]
27511	#[target_feature(enable = "avx512f")]
27512	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27513	#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
27514	pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
27515	a & b
27516	}
27517
27518	/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
27519	///
27520	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
27521	#[inline]
27522	#[target_feature(enable = "avx512f")]
27523	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27524	#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
27525	pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27526	a \| b
27527	}
27528
27529	/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
27530	///
27531	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kor&expand=3237)
27532	#[inline]
27533	#[target_feature(enable = "avx512f")]
27534	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27535	#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
27536	pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
27537	a \| b
27538	}
27539
27540	/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
27541	///
27542	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
27543	#[inline]
27544	#[target_feature(enable = "avx512f")]
27545	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27546	#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
27547	pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27548	a ^ b
27549	}
27550
27551	/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
27552	///
27553	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kxor&expand=3289)
27554	#[inline]
27555	#[target_feature(enable = "avx512f")]
27556	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27557	#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
27558	pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
27559	a ^ b
27560	}
27561
27562	/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
27563	///
27564	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
27565	#[inline]
27566	#[target_feature(enable = "avx512f")]
27567	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27568	pub unsafe fn _knot_mask16(a: __mmask16) -> __mmask16 {
27569	a ^ `0b11111111_11111111`
27570	}
27571
27572	/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
27573	///
27574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_knot&expand=3231)
27575	#[inline]
27576	#[target_feature(enable = "avx512f")]
27577	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27578	pub unsafe fn _mm512_knot(a: __mmask16) -> __mmask16 {
27579	a ^ `0b11111111_11111111`
27580	}
27581
27582	/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
27583	///
27584	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
27585	#[inline]
27586	#[target_feature(enable = "avx512f")]
27587	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27588	#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
27589	pub unsafe fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27590	_mm512_kand(a:_mm512_knot(a), b)
27591	}
27592
27593	/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
27594	///
27595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kandn&expand=3216)
27596	#[inline]
27597	#[target_feature(enable = "avx512f")]
27598	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27599	#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
27600	pub unsafe fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
27601	_mm512_kand(a:_mm512_knot(a), b)
27602	}
27603
27604	/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
27605	///
27606	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
27607	#[inline]
27608	#[target_feature(enable = "avx512f")]
27609	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27610	#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
27611	pub unsafe fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27612	_mm512_knot(_mm512_kxor(a, b))
27613	}
27614
27615	/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
27616	///
27617	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kxnor&expand=3283)
27618	#[inline]
27619	#[target_feature(enable = "avx512f")]
27620	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27621	#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
27622	pub unsafe fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
27623	_mm512_knot(_mm512_kxor(a, b))
27624	}
27625
27626	/// Copy 16-bit mask a to k.
27627	///
27628	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
27629	#[inline]
27630	#[target_feature(enable = "avx512f")]
27631	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27632	#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
27633	pub unsafe fn _mm512_kmov(a: __mmask16) -> __mmask16 {
27634	a
27635	}
27636
27637	/// Converts integer mask into bitmask, storing the result in dst.
27638	///
27639	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_int2mask&expand=3189)
27640	#[inline]
27641	#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
27642	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27643	pub unsafe fn _mm512_int2mask(mask: i32) -> __mmask16 {
27644	mask as u16
27645	}
27646
27647	/// Converts bit mask k1 into an integer value, storing the results in dst.
27648	///
27649	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask2int&expand=3544)
27650	#[inline]
27651	#[target_feature(enable = "avx512f")]
27652	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27653	#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
27654	pub unsafe fn _mm512_mask2int(k1: __mmask16) -> i32 {
27655	k1 as i32
27656	}
27657
27658	/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
27659	///
27660	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kunpackb&expand=3280)
27661	#[inline]
27662	#[target_feature(enable = "avx512f")]
27663	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27664	#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
27665	pub unsafe fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
27666	let a: u16 = a & `0b00000000_11111111`;
27667	let b: u16 = b & `0b11111111_00000000`;
27668	a \| b
27669	}
27670
27671	/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
27672	///
27673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kortestc&expand=3247)
27674	#[inline]
27675	#[target_feature(enable = "avx512f")]
27676	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27677	#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
27678	pub unsafe fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
27679	let r: u16 = a \| b;
27680	if r == `0b11111111_11111111` {
27681	`1`
27682	} else {
27683	`0`
27684	}
27685	}
27686
27687	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27688	///
27689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
27690	#[inline]
27691	#[target_feature(enable = "avx512f")]
27692	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27693	#[cfg_attr(test, assert_instr(vptestmd))]
27694	pub unsafe fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
27695	let and: __m512i = _mm512_and_epi32(a, b);
27696	let zero: __m512i = _mm512_setzero_si512();
27697	_mm512_cmpneq_epi32_mask(a:and, b:zero)
27698	}
27699
27700	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27701	///
27702	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
27703	#[inline]
27704	#[target_feature(enable = "avx512f")]
27705	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27706	#[cfg_attr(test, assert_instr(vptestmd))]
27707	pub unsafe fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
27708	let and: __m512i = _mm512_and_epi32(a, b);
27709	let zero: __m512i = _mm512_setzero_si512();
27710	_mm512_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
27711	}
27712
27713	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27714	///
27715	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
27716	#[inline]
27717	#[target_feature(enable = "avx512f,avx512vl")]
27718	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27719	#[cfg_attr(test, assert_instr(vptestmd))]
27720	pub unsafe fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
27721	let and: __m256i = _mm256_and_si256(a, b);
27722	let zero: __m256i = _mm256_setzero_si256();
27723	_mm256_cmpneq_epi32_mask(a:and, b:zero)
27724	}
27725
27726	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27727	///
27728	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
27729	#[inline]
27730	#[target_feature(enable = "avx512f,avx512vl")]
27731	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27732	#[cfg_attr(test, assert_instr(vptestmd))]
27733	pub unsafe fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
27734	let and: __m256i = _mm256_and_si256(a, b);
27735	let zero: __m256i = _mm256_setzero_si256();
27736	_mm256_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
27737	}
27738
27739	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27740	///
27741	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
27742	#[inline]
27743	#[target_feature(enable = "avx512f,avx512vl")]
27744	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27745	#[cfg_attr(test, assert_instr(vptestmd))]
27746	pub unsafe fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
27747	let and: __m128i = _mm_and_si128(a, b);
27748	let zero: __m128i = _mm_setzero_si128();
27749	_mm_cmpneq_epi32_mask(a:and, b:zero)
27750	}
27751
27752	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27753	///
27754	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
27755	#[inline]
27756	#[target_feature(enable = "avx512f,avx512vl")]
27757	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27758	#[cfg_attr(test, assert_instr(vptestmd))]
27759	pub unsafe fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
27760	let and: __m128i = _mm_and_si128(a, b);
27761	let zero: __m128i = _mm_setzero_si128();
27762	_mm_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
27763	}
27764
27765	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27766	///
27767	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
27768	#[inline]
27769	#[target_feature(enable = "avx512f")]
27770	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27771	#[cfg_attr(test, assert_instr(vptestmq))]
27772	pub unsafe fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
27773	let and: __m512i = _mm512_and_epi64(a, b);
27774	let zero: __m512i = _mm512_setzero_si512();
27775	_mm512_cmpneq_epi64_mask(a:and, b:zero)
27776	}
27777
27778	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27779	///
27780	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
27781	#[inline]
27782	#[target_feature(enable = "avx512f")]
27783	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27784	#[cfg_attr(test, assert_instr(vptestmq))]
27785	pub unsafe fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
27786	let and: __m512i = _mm512_and_epi64(a, b);
27787	let zero: __m512i = _mm512_setzero_si512();
27788	_mm512_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
27789	}
27790
27791	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27792	///
27793	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
27794	#[inline]
27795	#[target_feature(enable = "avx512f,avx512vl")]
27796	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27797	#[cfg_attr(test, assert_instr(vptestmq))]
27798	pub unsafe fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
27799	let and: __m256i = _mm256_and_si256(a, b);
27800	let zero: __m256i = _mm256_setzero_si256();
27801	_mm256_cmpneq_epi64_mask(a:and, b:zero)
27802	}
27803
27804	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27805	///
27806	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
27807	#[inline]
27808	#[target_feature(enable = "avx512f,avx512vl")]
27809	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27810	#[cfg_attr(test, assert_instr(vptestmq))]
27811	pub unsafe fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
27812	let and: __m256i = _mm256_and_si256(a, b);
27813	let zero: __m256i = _mm256_setzero_si256();
27814	_mm256_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
27815	}
27816
27817	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27818	///
27819	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
27820	#[inline]
27821	#[target_feature(enable = "avx512f,avx512vl")]
27822	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27823	#[cfg_attr(test, assert_instr(vptestmq))]
27824	pub unsafe fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
27825	let and: __m128i = _mm_and_si128(a, b);
27826	let zero: __m128i = _mm_setzero_si128();
27827	_mm_cmpneq_epi64_mask(a:and, b:zero)
27828	}
27829
27830	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27831	///
27832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
27833	#[inline]
27834	#[target_feature(enable = "avx512f,avx512vl")]
27835	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27836	#[cfg_attr(test, assert_instr(vptestmq))]
27837	pub unsafe fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
27838	let and: __m128i = _mm_and_si128(a, b);
27839	let zero: __m128i = _mm_setzero_si128();
27840	_mm_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
27841	}
27842
27843	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27844	///
27845	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
27846	#[inline]
27847	#[target_feature(enable = "avx512f")]
27848	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27849	#[cfg_attr(test, assert_instr(vptestnmd))]
27850	pub unsafe fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
27851	let and: __m512i = _mm512_and_epi32(a, b);
27852	let zero: __m512i = _mm512_setzero_si512();
27853	_mm512_cmpeq_epi32_mask(a:and, b:zero)
27854	}
27855
27856	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27857	///
27858	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
27859	#[inline]
27860	#[target_feature(enable = "avx512f")]
27861	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27862	#[cfg_attr(test, assert_instr(vptestnmd))]
27863	pub unsafe fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
27864	let and: __m512i = _mm512_and_epi32(a, b);
27865	let zero: __m512i = _mm512_setzero_si512();
27866	_mm512_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
27867	}
27868
27869	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27870	///
27871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
27872	#[inline]
27873	#[target_feature(enable = "avx512f,avx512vl")]
27874	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27875	#[cfg_attr(test, assert_instr(vptestnmd))]
27876	pub unsafe fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
27877	let and: __m256i = _mm256_and_si256(a, b);
27878	let zero: __m256i = _mm256_setzero_si256();
27879	_mm256_cmpeq_epi32_mask(a:and, b:zero)
27880	}
27881
27882	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27883	///
27884	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
27885	#[inline]
27886	#[target_feature(enable = "avx512f,avx512vl")]
27887	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27888	#[cfg_attr(test, assert_instr(vptestnmd))]
27889	pub unsafe fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
27890	let and: __m256i = _mm256_and_si256(a, b);
27891	let zero: __m256i = _mm256_setzero_si256();
27892	_mm256_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
27893	}
27894
27895	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27896	///
27897	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
27898	#[inline]
27899	#[target_feature(enable = "avx512f,avx512vl")]
27900	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27901	#[cfg_attr(test, assert_instr(vptestnmd))]
27902	pub unsafe fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
27903	let and: __m128i = _mm_and_si128(a, b);
27904	let zero: __m128i = _mm_setzero_si128();
27905	_mm_cmpeq_epi32_mask(a:and, b:zero)
27906	}
27907
27908	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27909	///
27910	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
27911	#[inline]
27912	#[target_feature(enable = "avx512f,avx512vl")]
27913	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27914	#[cfg_attr(test, assert_instr(vptestnmd))]
27915	pub unsafe fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
27916	let and: __m128i = _mm_and_si128(a, b);
27917	let zero: __m128i = _mm_setzero_si128();
27918	_mm_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
27919	}
27920
27921	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27922	///
27923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
27924	#[inline]
27925	#[target_feature(enable = "avx512f")]
27926	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27927	#[cfg_attr(test, assert_instr(vptestnmq))]
27928	pub unsafe fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
27929	let and: __m512i = _mm512_and_epi64(a, b);
27930	let zero: __m512i = _mm512_setzero_si512();
27931	_mm512_cmpeq_epi64_mask(a:and, b:zero)
27932	}
27933
27934	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27935	///
27936	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
27937	#[inline]
27938	#[target_feature(enable = "avx512f")]
27939	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27940	#[cfg_attr(test, assert_instr(vptestnmq))]
27941	pub unsafe fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
27942	let and: __m512i = _mm512_and_epi64(a, b);
27943	let zero: __m512i = _mm512_setzero_si512();
27944	_mm512_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
27945	}
27946
27947	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27948	///
27949	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
27950	#[inline]
27951	#[target_feature(enable = "avx512f,avx512vl")]
27952	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27953	#[cfg_attr(test, assert_instr(vptestnmq))]
27954	pub unsafe fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
27955	let and: __m256i = _mm256_and_si256(a, b);
27956	let zero: __m256i = _mm256_setzero_si256();
27957	_mm256_cmpeq_epi64_mask(a:and, b:zero)
27958	}
27959
27960	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27961	///
27962	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
27963	#[inline]
27964	#[target_feature(enable = "avx512f,avx512vl")]
27965	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27966	#[cfg_attr(test, assert_instr(vptestnmq))]
27967	pub unsafe fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
27968	let and: __m256i = _mm256_and_si256(a, b);
27969	let zero: __m256i = _mm256_setzero_si256();
27970	_mm256_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
27971	}
27972
27973	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27974	///
27975	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
27976	#[inline]
27977	#[target_feature(enable = "avx512f,avx512vl")]
27978	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27979	#[cfg_attr(test, assert_instr(vptestnmq))]
27980	pub unsafe fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
27981	let and: __m128i = _mm_and_si128(a, b);
27982	let zero: __m128i = _mm_setzero_si128();
27983	_mm_cmpeq_epi64_mask(a:and, b:zero)
27984	}
27985
27986	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27987	///
27988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
27989	#[inline]
27990	#[target_feature(enable = "avx512f,avx512vl")]
27991	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27992	#[cfg_attr(test, assert_instr(vptestnmq))]
27993	pub unsafe fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
27994	let and: __m128i = _mm_and_si128(a, b);
27995	let zero: __m128i = _mm_setzero_si128();
27996	_mm_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
27997	}
27998
27999	/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
28000	///
28001	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
28002	///
28003	/// # Safety of non-temporal stores
28004	///
28005	/// After using this intrinsic, but before any other access to the memory that this intrinsic
28006	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
28007	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
28008	/// return.
28009	///
28010	/// See [`_mm_sfence`] for details.
28011	#[inline]
28012	#[target_feature(enable = "avx512f")]
28013	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28014	#[cfg_attr(test, assert_instr(vmovntps))]
28015	#[allow(clippy::cast_ptr_alignment)]
28016	pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
28017	intrinsics::nontemporal_store(ptr:mem_addr as *mut __m512, val:a);
28018	}
28019
28020	/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
28021	///
28022	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
28023	///
28024	/// # Safety of non-temporal stores
28025	///
28026	/// After using this intrinsic, but before any other access to the memory that this intrinsic
28027	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
28028	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
28029	/// return.
28030	///
28031	/// See [`_mm_sfence`] for details.
28032	#[inline]
28033	#[target_feature(enable = "avx512f")]
28034	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28035	#[cfg_attr(test, assert_instr(vmovntps))] //should be vmovntpd
28036	#[allow(clippy::cast_ptr_alignment)]
28037	pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
28038	intrinsics::nontemporal_store(ptr:mem_addr as *mut __m512d, val:a);
28039	}
28040
28041	/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
28042	///
28043	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
28044	///
28045	/// # Safety of non-temporal stores
28046	///
28047	/// After using this intrinsic, but before any other access to the memory that this intrinsic
28048	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
28049	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
28050	/// return.
28051	///
28052	/// See [`_mm_sfence`] for details.
28053	#[inline]
28054	#[target_feature(enable = "avx512f")]
28055	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28056	#[cfg_attr(test, assert_instr(vmovntps))] //should be vmovntdq
28057	#[allow(clippy::cast_ptr_alignment)]
28058	pub unsafe fn _mm512_stream_si512(mem_addr: *mut i64, a: __m512i) {
28059	intrinsics::nontemporal_store(ptr:mem_addr as *mut __m512i, val:a);
28060	}
28061
28062	/// Sets packed 32-bit integers in `dst` with the supplied values.
28063	///
28064	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
28065	#[inline]
28066	#[target_feature(enable = "avx512f")]
28067	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28068	pub unsafe fn _mm512_set_ps(
28069	e0: f32,
28070	e1: f32,
28071	e2: f32,
28072	e3: f32,
28073	e4: f32,
28074	e5: f32,
28075	e6: f32,
28076	e7: f32,
28077	e8: f32,
28078	e9: f32,
28079	e10: f32,
28080	e11: f32,
28081	e12: f32,
28082	e13: f32,
28083	e14: f32,
28084	e15: f32,
28085	) -> __m512 {
28086	_mm512_setr_ps(
28087	e0:e15, e1:e14, e2:e13, e3:e12, e4:e11, e5:e10, e6:e9, e7:e8, e8:e7, e9:e6, e10:e5, e11:e4, e12:e3, e13:e2, e14:e1, e15:e0,
28088	)
28089	}
28090
28091	/// Sets packed 32-bit integers in `dst` with the supplied values in
28092	/// reverse order.
28093	///
28094	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
28095	#[inline]
28096	#[target_feature(enable = "avx512f")]
28097	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28098	pub unsafe fn _mm512_setr_ps(
28099	e0: f32,
28100	e1: f32,
28101	e2: f32,
28102	e3: f32,
28103	e4: f32,
28104	e5: f32,
28105	e6: f32,
28106	e7: f32,
28107	e8: f32,
28108	e9: f32,
28109	e10: f32,
28110	e11: f32,
28111	e12: f32,
28112	e13: f32,
28113	e14: f32,
28114	e15: f32,
28115	) -> __m512 {
28116	let r: f32x16 = f32x16::new(
28117	x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
28118	);
28119	transmute(src:r)
28120	}
28121
28122	/// Broadcast 64-bit float `a` to all elements of `dst`.
28123	///
28124	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
28125	#[inline]
28126	#[target_feature(enable = "avx512f")]
28127	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28128	pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d {
28129	transmute(src:f64x8::splat(a))
28130	}
28131
28132	/// Broadcast 32-bit float `a` to all elements of `dst`.
28133	///
28134	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
28135	#[inline]
28136	#[target_feature(enable = "avx512f")]
28137	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28138	pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 {
28139	transmute(src:f32x16::splat(a))
28140	}
28141
28142	/// Sets packed 32-bit integers in `dst` with the supplied values.
28143	///
28144	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
28145	#[inline]
28146	#[target_feature(enable = "avx512f")]
28147	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28148	pub unsafe fn _mm512_set_epi32(
28149	e15: i32,
28150	e14: i32,
28151	e13: i32,
28152	e12: i32,
28153	e11: i32,
28154	e10: i32,
28155	e9: i32,
28156	e8: i32,
28157	e7: i32,
28158	e6: i32,
28159	e5: i32,
28160	e4: i32,
28161	e3: i32,
28162	e2: i32,
28163	e1: i32,
28164	e0: i32,
28165	) -> __m512i {
28166	_mm512_setr_epi32(
28167	e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
28168	)
28169	}
28170
28171	/// Broadcast 8-bit integer a to all elements of dst.
28172	///
28173	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
28174	#[inline]
28175	#[target_feature(enable = "avx512f")]
28176	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28177	pub unsafe fn _mm512_set1_epi8(a: i8) -> __m512i {
28178	transmute(src:i8x64::splat(a))
28179	}
28180
28181	/// Broadcast the low packed 16-bit integer from a to all elements of dst.
28182	///
28183	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
28184	#[inline]
28185	#[target_feature(enable = "avx512f")]
28186	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28187	pub unsafe fn _mm512_set1_epi16(a: i16) -> __m512i {
28188	transmute(src:i16x32::splat(a))
28189	}
28190
28191	/// Broadcast 32-bit integer `a` to all elements of `dst`.
28192	#[inline]
28193	#[target_feature(enable = "avx512f")]
28194	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28195	pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
28196	transmute(src:i32x16::splat(a))
28197	}
28198
28199	/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28200	///
28201	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
28202	#[inline]
28203	#[target_feature(enable = "avx512f")]
28204	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28205	#[cfg_attr(test, assert_instr(vpbroadcastd))]
28206	pub unsafe fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
28207	let r: i32x16 = _mm512_set1_epi32(a).as_i32x16();
28208	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
28209	}
28210
28211	/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28212	///
28213	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
28214	#[inline]
28215	#[target_feature(enable = "avx512f")]
28216	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28217	#[cfg_attr(test, assert_instr(vpbroadcastd))]
28218	pub unsafe fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
28219	let r: i32x16 = _mm512_set1_epi32(a).as_i32x16();
28220	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
28221	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
28222	}
28223
28224	/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28225	///
28226	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
28227	#[inline]
28228	#[target_feature(enable = "avx512f,avx512vl")]
28229	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28230	#[cfg_attr(test, assert_instr(vpbroadcastd))]
28231	pub unsafe fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
28232	let r: i32x8 = _mm256_set1_epi32(a).as_i32x8();
28233	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
28234	}
28235
28236	/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28237	///
28238	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
28239	#[inline]
28240	#[target_feature(enable = "avx512f,avx512vl")]
28241	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28242	#[cfg_attr(test, assert_instr(vpbroadcastd))]
28243	pub unsafe fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
28244	let r: i32x8 = _mm256_set1_epi32(a).as_i32x8();
28245	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
28246	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
28247	}
28248
28249	/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28250	///
28251	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
28252	#[inline]
28253	#[target_feature(enable = "avx512f,avx512vl")]
28254	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28255	#[cfg_attr(test, assert_instr(vpbroadcastd))]
28256	pub unsafe fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
28257	let r: i32x4 = _mm_set1_epi32(a).as_i32x4();
28258	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
28259	}
28260
28261	/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28262	///
28263	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
28264	#[inline]
28265	#[target_feature(enable = "avx512f,avx512vl")]
28266	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28267	#[cfg_attr(test, assert_instr(vpbroadcastd))]
28268	pub unsafe fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
28269	let r: i32x4 = _mm_set1_epi32(a).as_i32x4();
28270	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
28271	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
28272	}
28273
28274	/// Broadcast 64-bit integer `a` to all elements of `dst`.
28275	///
28276	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
28277	#[inline]
28278	#[target_feature(enable = "avx512f")]
28279	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28280	pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
28281	transmute(src:i64x8::splat(a))
28282	}
28283
28284	/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28285	///
28286	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
28287	#[inline]
28288	#[target_feature(enable = "avx512f")]
28289	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28290	#[cfg_attr(test, assert_instr(vpbroadcastq))]
28291	pub unsafe fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
28292	let r: i64x8 = _mm512_set1_epi64(a).as_i64x8();
28293	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
28294	}
28295
28296	/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28297	///
28298	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
28299	#[inline]
28300	#[target_feature(enable = "avx512f")]
28301	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28302	#[cfg_attr(test, assert_instr(vpbroadcastq))]
28303	pub unsafe fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
28304	let r: i64x8 = _mm512_set1_epi64(a).as_i64x8();
28305	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
28306	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
28307	}
28308
28309	/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28310	///
28311	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
28312	#[inline]
28313	#[target_feature(enable = "avx512f,avx512vl")]
28314	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28315	#[cfg_attr(test, assert_instr(vpbroadcastq))]
28316	pub unsafe fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
28317	let r: i64x4 = _mm256_set1_epi64x(a).as_i64x4();
28318	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
28319	}
28320
28321	/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28322	///
28323	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
28324	#[inline]
28325	#[target_feature(enable = "avx512f,avx512vl")]
28326	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28327	#[cfg_attr(test, assert_instr(vpbroadcastq))]
28328	pub unsafe fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
28329	let r: i64x4 = _mm256_set1_epi64x(a).as_i64x4();
28330	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
28331	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
28332	}
28333
28334	/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28335	///
28336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
28337	#[inline]
28338	#[target_feature(enable = "avx512f,avx512vl")]
28339	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28340	#[cfg_attr(test, assert_instr(vpbroadcastq))]
28341	pub unsafe fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
28342	let r: i64x2 = _mm_set1_epi64x(a).as_i64x2();
28343	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
28344	}
28345
28346	/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28347	///
28348	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
28349	#[inline]
28350	#[target_feature(enable = "avx512f,avx512vl")]
28351	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28352	#[cfg_attr(test, assert_instr(vpbroadcastq))]
28353	pub unsafe fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
28354	let r: i64x2 = _mm_set1_epi64x(a).as_i64x2();
28355	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
28356	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
28357	}
28358
28359	/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
28360	///
28361	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
28362	#[inline]
28363	#[target_feature(enable = "avx512f")]
28364	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28365	pub unsafe fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
28366	_mm512_set_epi64(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
28367	}
28368
28369	/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
28370	///
28371	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
28372	#[inline]
28373	#[target_feature(enable = "avx512f")]
28374	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28375	pub unsafe fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
28376	_mm512_set_epi64(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
28377	}
28378
28379	/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
28380	///
28381	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
28382	#[inline]
28383	#[target_feature(enable = "avx512f")]
28384	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28385	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28386	pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28387	_mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
28388	}
28389
28390	/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28391	///
28392	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
28393	#[inline]
28394	#[target_feature(enable = "avx512f")]
28395	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28396	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28397	pub unsafe fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28398	_mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
28399	}
28400
28401	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
28402	///
28403	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
28404	#[inline]
28405	#[target_feature(enable = "avx512f")]
28406	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28407	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28408	pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28409	_mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
28410	}
28411
28412	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28413	///
28414	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
28415	#[inline]
28416	#[target_feature(enable = "avx512f")]
28417	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28418	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28419	pub unsafe fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28420	_mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
28421	}
28422
28423	/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
28424	///
28425	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
28426	#[inline]
28427	#[target_feature(enable = "avx512f")]
28428	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28429	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28430	pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28431	_mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
28432	}
28433
28434	/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28435	///
28436	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
28437	#[inline]
28438	#[target_feature(enable = "avx512f")]
28439	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28440	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28441	pub unsafe fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28442	_mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
28443	}
28444
28445	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
28446	///
28447	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
28448	#[inline]
28449	#[target_feature(enable = "avx512f")]
28450	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28451	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28452	pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28453	_mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
28454	}
28455
28456	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28457	///
28458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
28459	#[inline]
28460	#[target_feature(enable = "avx512f")]
28461	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28462	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28463	pub unsafe fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28464	_mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
28465	}
28466
28467	/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
28468	///
28469	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
28470	#[inline]
28471	#[target_feature(enable = "avx512f")]
28472	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28473	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28474	pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28475	_mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
28476	}
28477
28478	/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28479	///
28480	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
28481	#[inline]
28482	#[target_feature(enable = "avx512f")]
28483	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28484	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28485	pub unsafe fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28486	_mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
28487	}
28488
28489	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
28490	///
28491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
28492	#[inline]
28493	#[target_feature(enable = "avx512f")]
28494	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28495	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28496	pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28497	_mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
28498	}
28499
28500	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28501	///
28502	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
28503	#[inline]
28504	#[target_feature(enable = "avx512f")]
28505	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28506	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28507	pub unsafe fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28508	_mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
28509	}
28510
28511	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28512	///
28513	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
28514	#[inline]
28515	#[target_feature(enable = "avx512f")]
28516	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28517	#[rustc_legacy_const_generics(`2`)]
28518	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
28519	pub unsafe fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
28520	static_assert_uimm_bits!(IMM8, `5`);
28521	let neg_one: i16 = `-1`;
28522	let a: f32x16 = a.as_f32x16();
28523	let b: f32x16 = b.as_f32x16();
28524	let r: i16 = vcmpps(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
28525	transmute(src:r)
28526	}
28527
28528	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28529	///
28530	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
28531	#[inline]
28532	#[target_feature(enable = "avx512f")]
28533	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28534	#[rustc_legacy_const_generics(`3`)]
28535	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
28536	pub unsafe fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(
28537	k1: __mmask16,
28538	a: __m512,
28539	b: __m512,
28540	) -> __mmask16 {
28541	static_assert_uimm_bits!(IMM8, `5`);
28542	let a: f32x16 = a.as_f32x16();
28543	let b: f32x16 = b.as_f32x16();
28544	let r: i16 = vcmpps(a, b, IMM8, m:k1 as i16, _MM_FROUND_CUR_DIRECTION);
28545	transmute(src:r)
28546	}
28547
28548	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28549	///
28550	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
28551	#[inline]
28552	#[target_feature(enable = "avx512f,avx512vl")]
28553	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28554	#[rustc_legacy_const_generics(`2`)]
28555	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
28556	pub unsafe fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
28557	static_assert_uimm_bits!(IMM8, `5`);
28558	let neg_one: i8 = `-1`;
28559	let a: f32x8 = a.as_f32x8();
28560	let b: f32x8 = b.as_f32x8();
28561	let r: i8 = vcmpps256(a, b, IMM8, m:neg_one);
28562	transmute(src:r)
28563	}
28564
28565	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28566	///
28567	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
28568	#[inline]
28569	#[target_feature(enable = "avx512f,avx512vl")]
28570	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28571	#[rustc_legacy_const_generics(`3`)]
28572	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
28573	pub unsafe fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(
28574	k1: __mmask8,
28575	a: __m256,
28576	b: __m256,
28577	) -> __mmask8 {
28578	static_assert_uimm_bits!(IMM8, `5`);
28579	let a: f32x8 = a.as_f32x8();
28580	let b: f32x8 = b.as_f32x8();
28581	let r: i8 = vcmpps256(a, b, IMM8, m:k1 as i8);
28582	transmute(src:r)
28583	}
28584
28585	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28586	///
28587	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
28588	#[inline]
28589	#[target_feature(enable = "avx512f,avx512vl")]
28590	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28591	#[rustc_legacy_const_generics(`2`)]
28592	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
28593	pub unsafe fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
28594	static_assert_uimm_bits!(IMM8, `5`);
28595	let neg_one: i8 = `-1`;
28596	let a: f32x4 = a.as_f32x4();
28597	let b: f32x4 = b.as_f32x4();
28598	let r: i8 = vcmpps128(a, b, IMM8, m:neg_one);
28599	transmute(src:r)
28600	}
28601
28602	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28603	///
28604	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
28605	#[inline]
28606	#[target_feature(enable = "avx512f,avx512vl")]
28607	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28608	#[rustc_legacy_const_generics(`3`)]
28609	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
28610	pub unsafe fn _mm_mask_cmp_ps_mask<const IMM8: i32>(
28611	k1: __mmask8,
28612	a: __m128,
28613	b: __m128,
28614	) -> __mmask8 {
28615	static_assert_uimm_bits!(IMM8, `5`);
28616	let a: f32x4 = a.as_f32x4();
28617	let b: f32x4 = b.as_f32x4();
28618	let r: i8 = vcmpps128(a, b, IMM8, m:k1 as i8);
28619	transmute(src:r)
28620	}
28621
28622	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
28623	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
28624	///
28625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
28626	#[inline]
28627	#[target_feature(enable = "avx512f")]
28628	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28629	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
28630	#[rustc_legacy_const_generics(`2`, `3`)]
28631	pub unsafe fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
28632	a: __m512,
28633	b: __m512,
28634	) -> __mmask16 {
28635	static_assert_uimm_bits!(IMM5, `5`);
28636	static_assert_mantissas_sae!(SAE);
28637	let neg_one: i16 = `-1`;
28638	let a: f32x16 = a.as_f32x16();
28639	let b: f32x16 = b.as_f32x16();
28640	let r: i16 = vcmpps(a, b, IMM5, m:neg_one, SAE);
28641	transmute(src:r)
28642	}
28643
28644	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
28645	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
28646	///
28647	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
28648	#[inline]
28649	#[target_feature(enable = "avx512f")]
28650	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28651	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
28652	#[rustc_legacy_const_generics(`3`, `4`)]
28653	pub unsafe fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
28654	m: __mmask16,
28655	a: __m512,
28656	b: __m512,
28657	) -> __mmask16 {
28658	static_assert_uimm_bits!(IMM5, `5`);
28659	static_assert_mantissas_sae!(SAE);
28660	let a: f32x16 = a.as_f32x16();
28661	let b: f32x16 = b.as_f32x16();
28662	let r: i16 = vcmpps(a, b, IMM5, m as i16, SAE);
28663	transmute(src:r)
28664	}
28665
28666	/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
28667	///
28668	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
28669	#[inline]
28670	#[target_feature(enable = "avx512f")]
28671	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28672	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
28673	pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28674	_mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
28675	}
28676
28677	/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28678	///
28679	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
28680	#[inline]
28681	#[target_feature(enable = "avx512f")]
28682	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28683	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28684	pub unsafe fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28685	_mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
28686	}
28687
28688	/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
28689	///
28690	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
28691	#[inline]
28692	#[target_feature(enable = "avx512f")]
28693	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28694	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28695	pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28696	_mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
28697	}
28698
28699	/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28700	///
28701	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
28702	#[inline]
28703	#[target_feature(enable = "avx512f")]
28704	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28705	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28706	pub unsafe fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28707	_mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
28708	}
28709
28710	/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
28711	///
28712	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
28713	#[inline]
28714	#[target_feature(enable = "avx512f")]
28715	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28716	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28717	pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28718	_mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
28719	}
28720
28721	/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28722	///
28723	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
28724	#[inline]
28725	#[target_feature(enable = "avx512f")]
28726	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28727	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28728	pub unsafe fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28729	_mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
28730	}
28731
28732	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
28733	///
28734	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
28735	#[inline]
28736	#[target_feature(enable = "avx512f")]
28737	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28738	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28739	pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28740	_mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
28741	}
28742
28743	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28744	///
28745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
28746	#[inline]
28747	#[target_feature(enable = "avx512f")]
28748	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28749	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28750	pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28751	_mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(k1:m, a, b)
28752	}
28753
28754	/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
28755	///
28756	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
28757	#[inline]
28758	#[target_feature(enable = "avx512f")]
28759	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28760	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28761	pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28762	_mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
28763	}
28764
28765	/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28766	///
28767	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
28768	#[inline]
28769	#[target_feature(enable = "avx512f")]
28770	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28771	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28772	pub unsafe fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28773	_mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
28774	}
28775
28776	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
28777	///
28778	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
28779	#[inline]
28780	#[target_feature(enable = "avx512f")]
28781	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28782	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28783	pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28784	_mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
28785	}
28786
28787	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28788	///
28789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
28790	#[inline]
28791	#[target_feature(enable = "avx512f")]
28792	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28793	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28794	pub unsafe fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28795	_mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
28796	}
28797
28798	/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
28799	///
28800	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
28801	#[inline]
28802	#[target_feature(enable = "avx512f")]
28803	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28804	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28805	pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28806	_mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
28807	}
28808
28809	/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28810	///
28811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
28812	#[inline]
28813	#[target_feature(enable = "avx512f")]
28814	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28815	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28816	pub unsafe fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28817	_mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
28818	}
28819
28820	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
28821	///
28822	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
28823	#[inline]
28824	#[target_feature(enable = "avx512f")]
28825	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28826	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28827	pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28828	_mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
28829	}
28830
28831	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28832	///
28833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
28834	#[inline]
28835	#[target_feature(enable = "avx512f")]
28836	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28837	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28838	pub unsafe fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28839	_mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
28840	}
28841
28842	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28843	///
28844	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
28845	#[inline]
28846	#[target_feature(enable = "avx512f")]
28847	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28848	#[rustc_legacy_const_generics(`2`)]
28849	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
28850	pub unsafe fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
28851	static_assert_uimm_bits!(IMM8, `5`);
28852	let neg_one: i8 = `-1`;
28853	let a: f64x8 = a.as_f64x8();
28854	let b: f64x8 = b.as_f64x8();
28855	let r: i8 = vcmppd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
28856	transmute(src:r)
28857	}
28858
28859	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28860	///
28861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
28862	#[inline]
28863	#[target_feature(enable = "avx512f")]
28864	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28865	#[rustc_legacy_const_generics(`3`)]
28866	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
28867	pub unsafe fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(
28868	k1: __mmask8,
28869	a: __m512d,
28870	b: __m512d,
28871	) -> __mmask8 {
28872	static_assert_uimm_bits!(IMM8, `5`);
28873	let a: f64x8 = a.as_f64x8();
28874	let b: f64x8 = b.as_f64x8();
28875	let r: i8 = vcmppd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
28876	transmute(src:r)
28877	}
28878
28879	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28880	///
28881	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
28882	#[inline]
28883	#[target_feature(enable = "avx512f,avx512vl")]
28884	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28885	#[rustc_legacy_const_generics(`2`)]
28886	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
28887	pub unsafe fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
28888	static_assert_uimm_bits!(IMM8, `5`);
28889	let neg_one: i8 = `-1`;
28890	let a: f64x4 = a.as_f64x4();
28891	let b: f64x4 = b.as_f64x4();
28892	let r: i8 = vcmppd256(a, b, IMM8, m:neg_one);
28893	transmute(src:r)
28894	}
28895
28896	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28897	///
28898	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
28899	#[inline]
28900	#[target_feature(enable = "avx512f,avx512vl")]
28901	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28902	#[rustc_legacy_const_generics(`3`)]
28903	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
28904	pub unsafe fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(
28905	k1: __mmask8,
28906	a: __m256d,
28907	b: __m256d,
28908	) -> __mmask8 {
28909	static_assert_uimm_bits!(IMM8, `5`);
28910	let a: f64x4 = a.as_f64x4();
28911	let b: f64x4 = b.as_f64x4();
28912	let r: i8 = vcmppd256(a, b, IMM8, m:k1 as i8);
28913	transmute(src:r)
28914	}
28915
28916	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28917	///
28918	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
28919	#[inline]
28920	#[target_feature(enable = "avx512f,avx512vl")]
28921	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28922	#[rustc_legacy_const_generics(`2`)]
28923	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
28924	pub unsafe fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
28925	static_assert_uimm_bits!(IMM8, `5`);
28926	let neg_one: i8 = `-1`;
28927	let a: f64x2 = a.as_f64x2();
28928	let b: f64x2 = b.as_f64x2();
28929	let r: i8 = vcmppd128(a, b, IMM8, m:neg_one);
28930	transmute(src:r)
28931	}
28932
28933	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28934	///
28935	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
28936	#[inline]
28937	#[target_feature(enable = "avx512f,avx512vl")]
28938	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28939	#[rustc_legacy_const_generics(`3`)]
28940	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
28941	pub unsafe fn _mm_mask_cmp_pd_mask<const IMM8: i32>(
28942	k1: __mmask8,
28943	a: __m128d,
28944	b: __m128d,
28945	) -> __mmask8 {
28946	static_assert_uimm_bits!(IMM8, `5`);
28947	let a: f64x2 = a.as_f64x2();
28948	let b: f64x2 = b.as_f64x2();
28949	let r: i8 = vcmppd128(a, b, IMM8, m:k1 as i8);
28950	transmute(src:r)
28951	}
28952
28953	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
28954	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
28955	///
28956	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
28957	#[inline]
28958	#[target_feature(enable = "avx512f")]
28959	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28960	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
28961	#[rustc_legacy_const_generics(`2`, `3`)]
28962	pub unsafe fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
28963	a: __m512d,
28964	b: __m512d,
28965	) -> __mmask8 {
28966	static_assert_uimm_bits!(IMM5, `5`);
28967	static_assert_mantissas_sae!(SAE);
28968	let neg_one: i8 = `-1`;
28969	let a: f64x8 = a.as_f64x8();
28970	let b: f64x8 = b.as_f64x8();
28971	let r: i8 = vcmppd(a, b, IMM5, m:neg_one, SAE);
28972	transmute(src:r)
28973	}
28974
28975	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
28976	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
28977	///
28978	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
28979	#[inline]
28980	#[target_feature(enable = "avx512f")]
28981	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28982	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
28983	#[rustc_legacy_const_generics(`3`, `4`)]
28984	pub unsafe fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
28985	k1: __mmask8,
28986	a: __m512d,
28987	b: __m512d,
28988	) -> __mmask8 {
28989	static_assert_uimm_bits!(IMM5, `5`);
28990	static_assert_mantissas_sae!(SAE);
28991	let a: f64x8 = a.as_f64x8();
28992	let b: f64x8 = b.as_f64x8();
28993	let r: i8 = vcmppd(a, b, IMM5, m:k1 as i8, SAE);
28994	transmute(src:r)
28995	}
28996
28997	/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
28998	///
28999	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
29000	#[inline]
29001	#[target_feature(enable = "avx512f")]
29002	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29003	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
29004	pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
29005	_mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
29006	}
29007
29008	/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29009	///
29010	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
29011	#[inline]
29012	#[target_feature(enable = "avx512f")]
29013	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29014	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
29015	pub unsafe fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
29016	_mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
29017	}
29018
29019	/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
29020	///
29021	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
29022	#[inline]
29023	#[target_feature(enable = "avx512f")]
29024	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29025	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
29026	pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
29027	_mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
29028	}
29029
29030	/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29031	///
29032	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
29033	#[inline]
29034	#[target_feature(enable = "avx512f")]
29035	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29036	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
29037	pub unsafe fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
29038	_mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
29039	}
29040
29041	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
29042	///
29043	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
29044	#[inline]
29045	#[target_feature(enable = "avx512f")]
29046	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29047	#[rustc_legacy_const_generics(`2`)]
29048	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
29049	pub unsafe fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
29050	static_assert_uimm_bits!(IMM8, `5`);
29051	let neg_one: i8 = `-1`;
29052	let r: i8 = vcmpss(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
29053	transmute(src:r)
29054	}
29055
29056	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
29057	///
29058	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
29059	#[inline]
29060	#[target_feature(enable = "avx512f")]
29061	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29062	#[rustc_legacy_const_generics(`3`)]
29063	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
29064	pub unsafe fn _mm_mask_cmp_ss_mask<const IMM8: i32>(
29065	k1: __mmask8,
29066	a: __m128,
29067	b: __m128,
29068	) -> __mmask8 {
29069	static_assert_uimm_bits!(IMM8, `5`);
29070	let r: i8 = vcmpss(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
29071	transmute(src:r)
29072	}
29073
29074	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
29075	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29076	///
29077	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
29078	#[inline]
29079	#[target_feature(enable = "avx512f")]
29080	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29081	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
29082	#[rustc_legacy_const_generics(`2`, `3`)]
29083	pub unsafe fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
29084	a: __m128,
29085	b: __m128,
29086	) -> __mmask8 {
29087	static_assert_uimm_bits!(IMM5, `5`);
29088	static_assert_mantissas_sae!(SAE);
29089	let neg_one: i8 = `-1`;
29090	let r: i8 = vcmpss(a, b, IMM5, m:neg_one, SAE);
29091	transmute(src:r)
29092	}
29093
29094	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
29095	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29096	///
29097	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
29098	#[inline]
29099	#[target_feature(enable = "avx512f")]
29100	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29101	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
29102	#[rustc_legacy_const_generics(`3`, `4`)]
29103	pub unsafe fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
29104	k1: __mmask8,
29105	a: __m128,
29106	b: __m128,
29107	) -> __mmask8 {
29108	static_assert_uimm_bits!(IMM5, `5`);
29109	static_assert_mantissas_sae!(SAE);
29110	let r: i8 = vcmpss(a, b, IMM5, m:k1 as i8, SAE);
29111	transmute(src:r)
29112	}
29113
29114	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
29115	///
29116	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
29117	#[inline]
29118	#[target_feature(enable = "avx512f")]
29119	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29120	#[rustc_legacy_const_generics(`2`)]
29121	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
29122	pub unsafe fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
29123	static_assert_uimm_bits!(IMM8, `5`);
29124	let neg_one: i8 = `-1`;
29125	let r: i8 = vcmpsd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
29126	transmute(src:r)
29127	}
29128
29129	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
29130	///
29131	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
29132	#[inline]
29133	#[target_feature(enable = "avx512f")]
29134	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29135	#[rustc_legacy_const_generics(`3`)]
29136	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
29137	pub unsafe fn _mm_mask_cmp_sd_mask<const IMM8: i32>(
29138	k1: __mmask8,
29139	a: __m128d,
29140	b: __m128d,
29141	) -> __mmask8 {
29142	static_assert_uimm_bits!(IMM8, `5`);
29143	let r: i8 = vcmpsd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
29144	transmute(src:r)
29145	}
29146
29147	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
29148	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29149	///
29150	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
29151	#[inline]
29152	#[target_feature(enable = "avx512f")]
29153	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29154	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
29155	#[rustc_legacy_const_generics(`2`, `3`)]
29156	pub unsafe fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
29157	a: __m128d,
29158	b: __m128d,
29159	) -> __mmask8 {
29160	static_assert_uimm_bits!(IMM5, `5`);
29161	static_assert_mantissas_sae!(SAE);
29162	let neg_one: i8 = `-1`;
29163	let r: i8 = vcmpsd(a, b, IMM5, m:neg_one, SAE);
29164	transmute(src:r)
29165	}
29166
29167	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
29168	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29169	///
29170	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
29171	#[inline]
29172	#[target_feature(enable = "avx512f")]
29173	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29174	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
29175	#[rustc_legacy_const_generics(`3`, `4`)]
29176	pub unsafe fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
29177	k1: __mmask8,
29178	a: __m128d,
29179	b: __m128d,
29180	) -> __mmask8 {
29181	static_assert_uimm_bits!(IMM5, `5`);
29182	static_assert_mantissas_sae!(SAE);
29183	let r: i8 = vcmpsd(a, b, IMM5, m:k1 as i8, SAE);
29184	transmute(src:r)
29185	}
29186
29187	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
29188	///
29189	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
29190	#[inline]
29191	#[target_feature(enable = "avx512f")]
29192	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29193	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29194	pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29195	simd_bitmask::<u32x16, _>(simd_lt(x:a.as_u32x16(), y:b.as_u32x16()))
29196	}
29197
29198	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29199	///
29200	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
29201	#[inline]
29202	#[target_feature(enable = "avx512f")]
29203	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29204	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29205	pub unsafe fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29206	_mm512_cmplt_epu32_mask(a, b) & k1
29207	}
29208
29209	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
29210	///
29211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
29212	#[inline]
29213	#[target_feature(enable = "avx512f,avx512vl")]
29214	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29215	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29216	pub unsafe fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29217	simd_bitmask::<u32x8, _>(simd_lt(x:a.as_u32x8(), y:b.as_u32x8()))
29218	}
29219
29220	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29221	///
29222	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
29223	#[inline]
29224	#[target_feature(enable = "avx512f,avx512vl")]
29225	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29226	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29227	pub unsafe fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29228	_mm256_cmplt_epu32_mask(a, b) & k1
29229	}
29230
29231	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
29232	///
29233	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
29234	#[inline]
29235	#[target_feature(enable = "avx512f,avx512vl")]
29236	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29237	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29238	pub unsafe fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29239	simd_bitmask::<u32x4, _>(simd_lt(x:a.as_u32x4(), y:b.as_u32x4()))
29240	}
29241
29242	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29243	///
29244	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
29245	#[inline]
29246	#[target_feature(enable = "avx512f,avx512vl")]
29247	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29248	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29249	pub unsafe fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29250	_mm_cmplt_epu32_mask(a, b) & k1
29251	}
29252
29253	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29254	///
29255	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
29256	#[inline]
29257	#[target_feature(enable = "avx512f")]
29258	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29259	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29260	pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29261	simd_bitmask::<u32x16, _>(simd_gt(x:a.as_u32x16(), y:b.as_u32x16()))
29262	}
29263
29264	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29265	///
29266	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
29267	#[inline]
29268	#[target_feature(enable = "avx512f")]
29269	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29270	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29271	pub unsafe fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29272	_mm512_cmpgt_epu32_mask(a, b) & k1
29273	}
29274
29275	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29276	///
29277	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
29278	#[inline]
29279	#[target_feature(enable = "avx512f,avx512vl")]
29280	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29281	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29282	pub unsafe fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29283	simd_bitmask::<u32x8, _>(simd_gt(x:a.as_u32x8(), y:b.as_u32x8()))
29284	}
29285
29286	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29287	///
29288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
29289	#[inline]
29290	#[target_feature(enable = "avx512f,avx512vl")]
29291	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29292	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29293	pub unsafe fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29294	_mm256_cmpgt_epu32_mask(a, b) & k1
29295	}
29296
29297	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29298	///
29299	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
29300	#[inline]
29301	#[target_feature(enable = "avx512f,avx512vl")]
29302	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29303	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29304	pub unsafe fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29305	simd_bitmask::<u32x4, _>(simd_gt(x:a.as_u32x4(), y:b.as_u32x4()))
29306	}
29307
29308	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29309	///
29310	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
29311	#[inline]
29312	#[target_feature(enable = "avx512f,avx512vl")]
29313	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29314	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29315	pub unsafe fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29316	_mm_cmpgt_epu32_mask(a, b) & k1
29317	}
29318
29319	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29320	///
29321	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
29322	#[inline]
29323	#[target_feature(enable = "avx512f")]
29324	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29325	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29326	pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29327	simd_bitmask::<u32x16, _>(simd_le(x:a.as_u32x16(), y:b.as_u32x16()))
29328	}
29329
29330	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29331	///
29332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
29333	#[inline]
29334	#[target_feature(enable = "avx512f")]
29335	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29336	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29337	pub unsafe fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29338	_mm512_cmple_epu32_mask(a, b) & k1
29339	}
29340
29341	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29342	///
29343	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
29344	#[inline]
29345	#[target_feature(enable = "avx512f,avx512vl")]
29346	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29347	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29348	pub unsafe fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29349	simd_bitmask::<u32x8, _>(simd_le(x:a.as_u32x8(), y:b.as_u32x8()))
29350	}
29351
29352	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29353	///
29354	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
29355	#[inline]
29356	#[target_feature(enable = "avx512f,avx512vl")]
29357	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29358	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29359	pub unsafe fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29360	_mm256_cmple_epu32_mask(a, b) & k1
29361	}
29362
29363	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29364	///
29365	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
29366	#[inline]
29367	#[target_feature(enable = "avx512f,avx512vl")]
29368	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29369	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29370	pub unsafe fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29371	simd_bitmask::<u32x4, _>(simd_le(x:a.as_u32x4(), y:b.as_u32x4()))
29372	}
29373
29374	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29375	///
29376	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
29377	#[inline]
29378	#[target_feature(enable = "avx512f,avx512vl")]
29379	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29380	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29381	pub unsafe fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29382	_mm_cmple_epu32_mask(a, b) & k1
29383	}
29384
29385	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29386	///
29387	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
29388	#[inline]
29389	#[target_feature(enable = "avx512f")]
29390	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29391	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29392	pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29393	simd_bitmask::<u32x16, _>(simd_ge(x:a.as_u32x16(), y:b.as_u32x16()))
29394	}
29395
29396	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29397	///
29398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
29399	#[inline]
29400	#[target_feature(enable = "avx512f")]
29401	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29402	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29403	pub unsafe fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29404	_mm512_cmpge_epu32_mask(a, b) & k1
29405	}
29406
29407	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29408	///
29409	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
29410	#[inline]
29411	#[target_feature(enable = "avx512f,avx512vl")]
29412	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29413	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29414	pub unsafe fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29415	simd_bitmask::<u32x8, _>(simd_ge(x:a.as_u32x8(), y:b.as_u32x8()))
29416	}
29417
29418	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29419	///
29420	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
29421	#[inline]
29422	#[target_feature(enable = "avx512f,avx512vl")]
29423	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29424	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29425	pub unsafe fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29426	_mm256_cmpge_epu32_mask(a, b) & k1
29427	}
29428
29429	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29430	///
29431	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
29432	#[inline]
29433	#[target_feature(enable = "avx512f,avx512vl")]
29434	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29435	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29436	pub unsafe fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29437	simd_bitmask::<u32x4, _>(simd_ge(x:a.as_u32x4(), y:b.as_u32x4()))
29438	}
29439
29440	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29441	///
29442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
29443	#[inline]
29444	#[target_feature(enable = "avx512f,avx512vl")]
29445	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29446	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29447	pub unsafe fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29448	_mm_cmpge_epu32_mask(a, b) & k1
29449	}
29450
29451	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
29452	///
29453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
29454	#[inline]
29455	#[target_feature(enable = "avx512f")]
29456	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29457	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29458	pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29459	simd_bitmask::<u32x16, _>(simd_eq(x:a.as_u32x16(), y:b.as_u32x16()))
29460	}
29461
29462	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29463	///
29464	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
29465	#[inline]
29466	#[target_feature(enable = "avx512f")]
29467	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29468	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29469	pub unsafe fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29470	_mm512_cmpeq_epu32_mask(a, b) & k1
29471	}
29472
29473	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
29474	///
29475	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
29476	#[inline]
29477	#[target_feature(enable = "avx512f,avx512vl")]
29478	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29479	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29480	pub unsafe fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29481	simd_bitmask::<u32x8, _>(simd_eq(x:a.as_u32x8(), y:b.as_u32x8()))
29482	}
29483
29484	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29485	///
29486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
29487	#[inline]
29488	#[target_feature(enable = "avx512f,avx512vl")]
29489	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29490	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29491	pub unsafe fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29492	_mm256_cmpeq_epu32_mask(a, b) & k1
29493	}
29494
29495	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
29496	///
29497	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
29498	#[inline]
29499	#[target_feature(enable = "avx512f,avx512vl")]
29500	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29501	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29502	pub unsafe fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29503	simd_bitmask::<u32x4, _>(simd_eq(x:a.as_u32x4(), y:b.as_u32x4()))
29504	}
29505
29506	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29507	///
29508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
29509	#[inline]
29510	#[target_feature(enable = "avx512f,avx512vl")]
29511	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29512	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29513	pub unsafe fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29514	_mm_cmpeq_epu32_mask(a, b) & k1
29515	}
29516
29517	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
29518	///
29519	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
29520	#[inline]
29521	#[target_feature(enable = "avx512f")]
29522	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29523	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29524	pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29525	simd_bitmask::<u32x16, _>(simd_ne(x:a.as_u32x16(), y:b.as_u32x16()))
29526	}
29527
29528	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29529	///
29530	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
29531	#[inline]
29532	#[target_feature(enable = "avx512f")]
29533	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29534	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29535	pub unsafe fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29536	_mm512_cmpneq_epu32_mask(a, b) & k1
29537	}
29538
29539	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
29540	///
29541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
29542	#[inline]
29543	#[target_feature(enable = "avx512f,avx512vl")]
29544	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29545	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29546	pub unsafe fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29547	simd_bitmask::<u32x8, _>(simd_ne(x:a.as_u32x8(), y:b.as_u32x8()))
29548	}
29549
29550	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29551	///
29552	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
29553	#[inline]
29554	#[target_feature(enable = "avx512f,avx512vl")]
29555	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29556	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29557	pub unsafe fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29558	_mm256_cmpneq_epu32_mask(a, b) & k1
29559	}
29560
29561	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
29562	///
29563	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
29564	#[inline]
29565	#[target_feature(enable = "avx512f,avx512vl")]
29566	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29567	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29568	pub unsafe fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29569	simd_bitmask::<u32x4, _>(simd_ne(x:a.as_u32x4(), y:b.as_u32x4()))
29570	}
29571
29572	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29573	///
29574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
29575	#[inline]
29576	#[target_feature(enable = "avx512f,avx512vl")]
29577	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29578	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29579	pub unsafe fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29580	_mm_cmpneq_epu32_mask(a, b) & k1
29581	}
29582
29583	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
29584	///
29585	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
29586	#[inline]
29587	#[target_feature(enable = "avx512f")]
29588	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29589	#[rustc_legacy_const_generics(`2`)]
29590	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
29591	pub unsafe fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29592	a: __m512i,
29593	b: __m512i,
29594	) -> __mmask16 {
29595	static_assert_uimm_bits!(IMM3, `3`);
29596	let neg_one: i16 = `-1`;
29597	let a: i32x16 = a.as_i32x16();
29598	let b: i32x16 = b.as_i32x16();
29599	let r: i16 = vpcmpud(a, b, IMM3, m:neg_one);
29600	transmute(src:r)
29601	}
29602
29603	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29604	///
29605	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
29606	#[inline]
29607	#[target_feature(enable = "avx512f")]
29608	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29609	#[rustc_legacy_const_generics(`3`)]
29610	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
29611	pub unsafe fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29612	k1: __mmask16,
29613	a: __m512i,
29614	b: __m512i,
29615	) -> __mmask16 {
29616	static_assert_uimm_bits!(IMM3, `3`);
29617	let a: i32x16 = a.as_i32x16();
29618	let b: i32x16 = b.as_i32x16();
29619	let r: i16 = vpcmpud(a, b, IMM3, m:k1 as i16);
29620	transmute(src:r)
29621	}
29622
29623	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
29624	///
29625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
29626	#[inline]
29627	#[target_feature(enable = "avx512f,avx512vl")]
29628	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29629	#[rustc_legacy_const_generics(`2`)]
29630	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
29631	pub unsafe fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29632	a: __m256i,
29633	b: __m256i,
29634	) -> __mmask8 {
29635	static_assert_uimm_bits!(IMM3, `3`);
29636	let neg_one: i8 = `-1`;
29637	let a: i32x8 = a.as_i32x8();
29638	let b: i32x8 = b.as_i32x8();
29639	let r: i8 = vpcmpud256(a, b, IMM3, m:neg_one);
29640	transmute(src:r)
29641	}
29642
29643	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29644	///
29645	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
29646	#[inline]
29647	#[target_feature(enable = "avx512f,avx512vl")]
29648	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29649	#[rustc_legacy_const_generics(`3`)]
29650	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
29651	pub unsafe fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29652	k1: __mmask8,
29653	a: __m256i,
29654	b: __m256i,
29655	) -> __mmask8 {
29656	static_assert_uimm_bits!(IMM3, `3`);
29657	let a: i32x8 = a.as_i32x8();
29658	let b: i32x8 = b.as_i32x8();
29659	let r: i8 = vpcmpud256(a, b, IMM3, m:k1 as i8);
29660	transmute(src:r)
29661	}
29662
29663	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
29664	///
29665	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
29666	#[inline]
29667	#[target_feature(enable = "avx512f,avx512vl")]
29668	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29669	#[rustc_legacy_const_generics(`2`)]
29670	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
29671	pub unsafe fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
29672	static_assert_uimm_bits!(IMM3, `3`);
29673	let neg_one: i8 = `-1`;
29674	let a: i32x4 = a.as_i32x4();
29675	let b: i32x4 = b.as_i32x4();
29676	let r: i8 = vpcmpud128(a, b, IMM3, m:neg_one);
29677	transmute(src:r)
29678	}
29679
29680	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29681	///
29682	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
29683	#[inline]
29684	#[target_feature(enable = "avx512f,avx512vl")]
29685	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29686	#[rustc_legacy_const_generics(`3`)]
29687	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
29688	pub unsafe fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29689	k1: __mmask8,
29690	a: __m128i,
29691	b: __m128i,
29692	) -> __mmask8 {
29693	static_assert_uimm_bits!(IMM3, `3`);
29694	let a: i32x4 = a.as_i32x4();
29695	let b: i32x4 = b.as_i32x4();
29696	let r: i8 = vpcmpud128(a, b, IMM3, m:k1 as i8);
29697	transmute(src:r)
29698	}
29699
29700	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
29701	///
29702	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
29703	#[inline]
29704	#[target_feature(enable = "avx512f")]
29705	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29706	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29707	pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29708	simd_bitmask::<i32x16, _>(simd_lt(x:a.as_i32x16(), y:b.as_i32x16()))
29709	}
29710
29711	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29712	///
29713	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
29714	#[inline]
29715	#[target_feature(enable = "avx512f")]
29716	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29717	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29718	pub unsafe fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29719	_mm512_cmplt_epi32_mask(a, b) & k1
29720	}
29721
29722	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
29723	///
29724	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
29725	#[inline]
29726	#[target_feature(enable = "avx512f,avx512vl")]
29727	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29728	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29729	pub unsafe fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29730	simd_bitmask::<i32x8, _>(simd_lt(x:a.as_i32x8(), y:b.as_i32x8()))
29731	}
29732
29733	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29734	///
29735	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
29736	#[inline]
29737	#[target_feature(enable = "avx512f,avx512vl")]
29738	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29739	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29740	pub unsafe fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29741	_mm256_cmplt_epi32_mask(a, b) & k1
29742	}
29743
29744	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
29745	///
29746	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
29747	#[inline]
29748	#[target_feature(enable = "avx512f,avx512vl")]
29749	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29750	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29751	pub unsafe fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29752	simd_bitmask::<i32x4, _>(simd_lt(x:a.as_i32x4(), y:b.as_i32x4()))
29753	}
29754
29755	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29756	///
29757	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
29758	#[inline]
29759	#[target_feature(enable = "avx512f,avx512vl")]
29760	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29761	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29762	pub unsafe fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29763	_mm_cmplt_epi32_mask(a, b) & k1
29764	}
29765
29766	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29767	///
29768	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
29769	#[inline]
29770	#[target_feature(enable = "avx512f")]
29771	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29772	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29773	pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29774	simd_bitmask::<i32x16, _>(simd_gt(x:a.as_i32x16(), y:b.as_i32x16()))
29775	}
29776
29777	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29778	///
29779	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
29780	#[inline]
29781	#[target_feature(enable = "avx512f")]
29782	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29783	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29784	pub unsafe fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29785	_mm512_cmpgt_epi32_mask(a, b) & k1
29786	}
29787
29788	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29789	///
29790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
29791	#[inline]
29792	#[target_feature(enable = "avx512f,avx512vl")]
29793	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29794	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29795	pub unsafe fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29796	simd_bitmask::<i32x8, _>(simd_gt(x:a.as_i32x8(), y:b.as_i32x8()))
29797	}
29798
29799	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29800	///
29801	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
29802	#[inline]
29803	#[target_feature(enable = "avx512f,avx512vl")]
29804	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29805	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29806	pub unsafe fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29807	_mm256_cmpgt_epi32_mask(a, b) & k1
29808	}
29809
29810	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29811	///
29812	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
29813	#[inline]
29814	#[target_feature(enable = "avx512f,avx512vl")]
29815	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29816	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29817	pub unsafe fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29818	simd_bitmask::<i32x4, _>(simd_gt(x:a.as_i32x4(), y:b.as_i32x4()))
29819	}
29820
29821	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29822	///
29823	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
29824	#[inline]
29825	#[target_feature(enable = "avx512f,avx512vl")]
29826	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29827	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29828	pub unsafe fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29829	_mm_cmpgt_epi32_mask(a, b) & k1
29830	}
29831
29832	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29833	///
29834	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
29835	#[inline]
29836	#[target_feature(enable = "avx512f")]
29837	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29838	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29839	pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29840	simd_bitmask::<i32x16, _>(simd_le(x:a.as_i32x16(), y:b.as_i32x16()))
29841	}
29842
29843	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29844	///
29845	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
29846	#[inline]
29847	#[target_feature(enable = "avx512f")]
29848	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29849	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29850	pub unsafe fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29851	_mm512_cmple_epi32_mask(a, b) & k1
29852	}
29853
29854	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29855	///
29856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
29857	#[inline]
29858	#[target_feature(enable = "avx512f,avx512vl")]
29859	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29860	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29861	pub unsafe fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29862	simd_bitmask::<i32x8, _>(simd_le(x:a.as_i32x8(), y:b.as_i32x8()))
29863	}
29864
29865	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29866	///
29867	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
29868	#[inline]
29869	#[target_feature(enable = "avx512f,avx512vl")]
29870	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29871	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29872	pub unsafe fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29873	_mm256_cmple_epi32_mask(a, b) & k1
29874	}
29875
29876	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29877	///
29878	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
29879	#[inline]
29880	#[target_feature(enable = "avx512f,avx512vl")]
29881	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29882	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29883	pub unsafe fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29884	simd_bitmask::<i32x4, _>(simd_le(x:a.as_i32x4(), y:b.as_i32x4()))
29885	}
29886
29887	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29888	///
29889	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
29890	#[inline]
29891	#[target_feature(enable = "avx512f,avx512vl")]
29892	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29893	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29894	pub unsafe fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29895	_mm_cmple_epi32_mask(a, b) & k1
29896	}
29897
29898	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29899	///
29900	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
29901	#[inline]
29902	#[target_feature(enable = "avx512f")]
29903	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29904	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29905	pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29906	simd_bitmask::<i32x16, _>(simd_ge(x:a.as_i32x16(), y:b.as_i32x16()))
29907	}
29908
29909	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29910	///
29911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
29912	#[inline]
29913	#[target_feature(enable = "avx512f")]
29914	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29915	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29916	pub unsafe fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29917	_mm512_cmpge_epi32_mask(a, b) & k1
29918	}
29919
29920	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29921	///
29922	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
29923	#[inline]
29924	#[target_feature(enable = "avx512f,avx512vl")]
29925	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29926	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29927	pub unsafe fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29928	simd_bitmask::<i32x8, _>(simd_ge(x:a.as_i32x8(), y:b.as_i32x8()))
29929	}
29930
29931	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29932	///
29933	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
29934	#[inline]
29935	#[target_feature(enable = "avx512f,avx512vl")]
29936	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29937	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29938	pub unsafe fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29939	_mm256_cmpge_epi32_mask(a, b) & k1
29940	}
29941
29942	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29943	///
29944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
29945	#[inline]
29946	#[target_feature(enable = "avx512f,avx512vl")]
29947	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29948	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29949	pub unsafe fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29950	simd_bitmask::<i32x4, _>(simd_ge(x:a.as_i32x4(), y:b.as_i32x4()))
29951	}
29952
29953	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29954	///
29955	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
29956	#[inline]
29957	#[target_feature(enable = "avx512f,avx512vl")]
29958	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29959	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29960	pub unsafe fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29961	_mm_cmpge_epi32_mask(a, b) & k1
29962	}
29963
29964	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
29965	///
29966	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
29967	#[inline]
29968	#[target_feature(enable = "avx512f")]
29969	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29970	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29971	pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29972	simd_bitmask::<i32x16, _>(simd_eq(x:a.as_i32x16(), y:b.as_i32x16()))
29973	}
29974
29975	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29976	///
29977	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
29978	#[inline]
29979	#[target_feature(enable = "avx512f")]
29980	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29981	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29982	pub unsafe fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29983	_mm512_cmpeq_epi32_mask(a, b) & k1
29984	}
29985
29986	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
29987	///
29988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
29989	#[inline]
29990	#[target_feature(enable = "avx512f,avx512vl")]
29991	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29992	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29993	pub unsafe fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29994	simd_bitmask::<i32x8, _>(simd_eq(x:a.as_i32x8(), y:b.as_i32x8()))
29995	}
29996
29997	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29998	///
29999	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
30000	#[inline]
30001	#[target_feature(enable = "avx512f,avx512vl")]
30002	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30003	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30004	pub unsafe fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30005	_mm256_cmpeq_epi32_mask(a, b) & k1
30006	}
30007
30008	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
30009	///
30010	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
30011	#[inline]
30012	#[target_feature(enable = "avx512f,avx512vl")]
30013	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30014	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30015	pub unsafe fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30016	simd_bitmask::<i32x4, _>(simd_eq(x:a.as_i32x4(), y:b.as_i32x4()))
30017	}
30018
30019	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30020	///
30021	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
30022	#[inline]
30023	#[target_feature(enable = "avx512f,avx512vl")]
30024	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30025	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30026	pub unsafe fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30027	_mm_cmpeq_epi32_mask(a, b) & k1
30028	}
30029
30030	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
30031	///
30032	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
30033	#[inline]
30034	#[target_feature(enable = "avx512f")]
30035	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30036	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30037	pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30038	simd_bitmask::<i32x16, _>(simd_ne(x:a.as_i32x16(), y:b.as_i32x16()))
30039	}
30040
30041	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30042	///
30043	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
30044	#[inline]
30045	#[target_feature(enable = "avx512f")]
30046	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30047	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30048	pub unsafe fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30049	_mm512_cmpneq_epi32_mask(a, b) & k1
30050	}
30051
30052	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
30053	///
30054	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
30055	#[inline]
30056	#[target_feature(enable = "avx512f,avx512vl")]
30057	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30058	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30059	pub unsafe fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30060	simd_bitmask::<i32x8, _>(simd_ne(x:a.as_i32x8(), y:b.as_i32x8()))
30061	}
30062
30063	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30064	///
30065	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
30066	#[inline]
30067	#[target_feature(enable = "avx512f,avx512vl")]
30068	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30069	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30070	pub unsafe fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30071	_mm256_cmpneq_epi32_mask(a, b) & k1
30072	}
30073
30074	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
30075	///
30076	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
30077	#[inline]
30078	#[target_feature(enable = "avx512f,avx512vl")]
30079	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30080	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30081	pub unsafe fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30082	simd_bitmask::<i32x4, _>(simd_ne(x:a.as_i32x4(), y:b.as_i32x4()))
30083	}
30084
30085	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30086	///
30087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
30088	#[inline]
30089	#[target_feature(enable = "avx512f,avx512vl")]
30090	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30091	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30092	pub unsafe fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30093	_mm_cmpneq_epi32_mask(a, b) & k1
30094	}
30095
30096	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30097	///
30098	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
30099	#[inline]
30100	#[target_feature(enable = "avx512f")]
30101	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30102	#[rustc_legacy_const_generics(`2`)]
30103	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
30104	pub unsafe fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30105	a: __m512i,
30106	b: __m512i,
30107	) -> __mmask16 {
30108	static_assert_uimm_bits!(IMM3, `3`);
30109	let neg_one: i16 = `-1`;
30110	let a: i32x16 = a.as_i32x16();
30111	let b: i32x16 = b.as_i32x16();
30112	let r: i16 = vpcmpd(a, b, IMM3, m:neg_one);
30113	transmute(src:r)
30114	}
30115
30116	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30117	///
30118	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
30119	#[inline]
30120	#[target_feature(enable = "avx512f")]
30121	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30122	#[rustc_legacy_const_generics(`3`)]
30123	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
30124	pub unsafe fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30125	k1: __mmask16,
30126	a: __m512i,
30127	b: __m512i,
30128	) -> __mmask16 {
30129	static_assert_uimm_bits!(IMM3, `3`);
30130	let a: i32x16 = a.as_i32x16();
30131	let b: i32x16 = b.as_i32x16();
30132	let r: i16 = vpcmpd(a, b, IMM3, m:k1 as i16);
30133	transmute(src:r)
30134	}
30135
30136	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30137	///
30138	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
30139	#[inline]
30140	#[target_feature(enable = "avx512f,avx512vl")]
30141	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30142	#[rustc_legacy_const_generics(`2`)]
30143	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
30144	pub unsafe fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30145	a: __m256i,
30146	b: __m256i,
30147	) -> __mmask8 {
30148	static_assert_uimm_bits!(IMM3, `3`);
30149	let neg_one: i8 = `-1`;
30150	let a: i32x8 = a.as_i32x8();
30151	let b: i32x8 = b.as_i32x8();
30152	let r: i8 = vpcmpd256(a, b, IMM3, m:neg_one);
30153	transmute(src:r)
30154	}
30155
30156	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30157	///
30158	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
30159	#[inline]
30160	#[target_feature(enable = "avx512f,avx512vl")]
30161	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30162	#[rustc_legacy_const_generics(`3`)]
30163	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
30164	pub unsafe fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30165	k1: __mmask8,
30166	a: __m256i,
30167	b: __m256i,
30168	) -> __mmask8 {
30169	static_assert_uimm_bits!(IMM3, `3`);
30170	let a: i32x8 = a.as_i32x8();
30171	let b: i32x8 = b.as_i32x8();
30172	let r: i8 = vpcmpd256(a, b, IMM3, m:k1 as i8);
30173	transmute(src:r)
30174	}
30175
30176	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30177	///
30178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
30179	#[inline]
30180	#[target_feature(enable = "avx512f,avx512vl")]
30181	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30182	#[rustc_legacy_const_generics(`2`)]
30183	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
30184	pub unsafe fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
30185	static_assert_uimm_bits!(IMM3, `3`);
30186	let neg_one: i8 = `-1`;
30187	let a: i32x4 = a.as_i32x4();
30188	let b: i32x4 = b.as_i32x4();
30189	let r: i8 = vpcmpd128(a, b, IMM3, m:neg_one);
30190	transmute(src:r)
30191	}
30192
30193	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30194	///
30195	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
30196	#[inline]
30197	#[target_feature(enable = "avx512f,avx512vl")]
30198	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30199	#[rustc_legacy_const_generics(`3`)]
30200	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
30201	pub unsafe fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30202	k1: __mmask8,
30203	a: __m128i,
30204	b: __m128i,
30205	) -> __mmask8 {
30206	static_assert_uimm_bits!(IMM3, `3`);
30207	let a: i32x4 = a.as_i32x4();
30208	let b: i32x4 = b.as_i32x4();
30209	let r: i8 = vpcmpd128(a, b, IMM3, m:k1 as i8);
30210	transmute(src:r)
30211	}
30212
30213	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
30214	///
30215	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
30216	#[inline]
30217	#[target_feature(enable = "avx512f")]
30218	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30219	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30220	pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30221	simd_bitmask::<__m512i, _>(simd_lt(x:a.as_u64x8(), y:b.as_u64x8()))
30222	}
30223
30224	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30225	///
30226	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
30227	#[inline]
30228	#[target_feature(enable = "avx512f")]
30229	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30230	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30231	pub unsafe fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30232	_mm512_cmplt_epu64_mask(a, b) & k1
30233	}
30234
30235	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
30236	///
30237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
30238	#[inline]
30239	#[target_feature(enable = "avx512f,avx512vl")]
30240	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30241	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30242	pub unsafe fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30243	simd_bitmask::<__m256i, _>(simd_lt(x:a.as_u64x4(), y:b.as_u64x4()))
30244	}
30245
30246	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30247	///
30248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
30249	#[inline]
30250	#[target_feature(enable = "avx512f,avx512vl")]
30251	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30252	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30253	pub unsafe fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30254	_mm256_cmplt_epu64_mask(a, b) & k1
30255	}
30256
30257	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
30258	///
30259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
30260	#[inline]
30261	#[target_feature(enable = "avx512f,avx512vl")]
30262	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30263	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30264	pub unsafe fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30265	simd_bitmask::<__m128i, _>(simd_lt(x:a.as_u64x2(), y:b.as_u64x2()))
30266	}
30267
30268	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30269	///
30270	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
30271	#[inline]
30272	#[target_feature(enable = "avx512f,avx512vl")]
30273	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30274	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30275	pub unsafe fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30276	_mm_cmplt_epu64_mask(a, b) & k1
30277	}
30278
30279	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30280	///
30281	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
30282	#[inline]
30283	#[target_feature(enable = "avx512f")]
30284	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30285	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30286	pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30287	simd_bitmask::<__m512i, _>(simd_gt(x:a.as_u64x8(), y:b.as_u64x8()))
30288	}
30289
30290	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30291	///
30292	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
30293	#[inline]
30294	#[target_feature(enable = "avx512f")]
30295	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30296	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30297	pub unsafe fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30298	_mm512_cmpgt_epu64_mask(a, b) & k1
30299	}
30300
30301	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30302	///
30303	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
30304	#[inline]
30305	#[target_feature(enable = "avx512f,avx512vl")]
30306	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30307	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30308	pub unsafe fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30309	simd_bitmask::<__m256i, _>(simd_gt(x:a.as_u64x4(), y:b.as_u64x4()))
30310	}
30311
30312	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30313	///
30314	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
30315	#[inline]
30316	#[target_feature(enable = "avx512f,avx512vl")]
30317	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30318	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30319	pub unsafe fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30320	_mm256_cmpgt_epu64_mask(a, b) & k1
30321	}
30322
30323	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30324	///
30325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
30326	#[inline]
30327	#[target_feature(enable = "avx512f,avx512vl")]
30328	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30329	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30330	pub unsafe fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30331	simd_bitmask::<__m128i, _>(simd_gt(x:a.as_u64x2(), y:b.as_u64x2()))
30332	}
30333
30334	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30335	///
30336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
30337	#[inline]
30338	#[target_feature(enable = "avx512f,avx512vl")]
30339	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30340	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30341	pub unsafe fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30342	_mm_cmpgt_epu64_mask(a, b) & k1
30343	}
30344
30345	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30346	///
30347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
30348	#[inline]
30349	#[target_feature(enable = "avx512f")]
30350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30351	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30352	pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30353	simd_bitmask::<__m512i, _>(simd_le(x:a.as_u64x8(), y:b.as_u64x8()))
30354	}
30355
30356	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30357	///
30358	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
30359	#[inline]
30360	#[target_feature(enable = "avx512f")]
30361	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30362	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30363	pub unsafe fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30364	_mm512_cmple_epu64_mask(a, b) & k1
30365	}
30366
30367	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30368	///
30369	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
30370	#[inline]
30371	#[target_feature(enable = "avx512f,avx512vl")]
30372	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30373	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30374	pub unsafe fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30375	simd_bitmask::<__m256i, _>(simd_le(x:a.as_u64x4(), y:b.as_u64x4()))
30376	}
30377
30378	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30379	///
30380	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
30381	#[inline]
30382	#[target_feature(enable = "avx512f,avx512vl")]
30383	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30384	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30385	pub unsafe fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30386	_mm256_cmple_epu64_mask(a, b) & k1
30387	}
30388
30389	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30390	///
30391	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
30392	#[inline]
30393	#[target_feature(enable = "avx512f,avx512vl")]
30394	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30395	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30396	pub unsafe fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30397	simd_bitmask::<__m128i, _>(simd_le(x:a.as_u64x2(), y:b.as_u64x2()))
30398	}
30399
30400	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30401	///
30402	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
30403	#[inline]
30404	#[target_feature(enable = "avx512f,avx512vl")]
30405	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30406	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30407	pub unsafe fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30408	_mm_cmple_epu64_mask(a, b) & k1
30409	}
30410
30411	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30412	///
30413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
30414	#[inline]
30415	#[target_feature(enable = "avx512f")]
30416	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30417	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30418	pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30419	simd_bitmask::<__m512i, _>(simd_ge(x:a.as_u64x8(), y:b.as_u64x8()))
30420	}
30421
30422	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30423	///
30424	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
30425	#[inline]
30426	#[target_feature(enable = "avx512f")]
30427	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30428	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30429	pub unsafe fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30430	_mm512_cmpge_epu64_mask(a, b) & k1
30431	}
30432
30433	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30434	///
30435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
30436	#[inline]
30437	#[target_feature(enable = "avx512f,avx512vl")]
30438	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30439	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30440	pub unsafe fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30441	simd_bitmask::<__m256i, _>(simd_ge(x:a.as_u64x4(), y:b.as_u64x4()))
30442	}
30443
30444	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30445	///
30446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
30447	#[inline]
30448	#[target_feature(enable = "avx512f,avx512vl")]
30449	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30450	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30451	pub unsafe fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30452	_mm256_cmpge_epu64_mask(a, b) & k1
30453	}
30454
30455	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30456	///
30457	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
30458	#[inline]
30459	#[target_feature(enable = "avx512f,avx512vl")]
30460	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30461	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30462	pub unsafe fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30463	simd_bitmask::<__m128i, _>(simd_ge(x:a.as_u64x2(), y:b.as_u64x2()))
30464	}
30465
30466	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30467	///
30468	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
30469	#[inline]
30470	#[target_feature(enable = "avx512f,avx512vl")]
30471	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30472	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30473	pub unsafe fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30474	_mm_cmpge_epu64_mask(a, b) & k1
30475	}
30476
30477	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
30478	///
30479	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
30480	#[inline]
30481	#[target_feature(enable = "avx512f")]
30482	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30483	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30484	pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30485	simd_bitmask::<__m512i, _>(simd_eq(x:a.as_u64x8(), y:b.as_u64x8()))
30486	}
30487
30488	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30489	///
30490	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
30491	#[inline]
30492	#[target_feature(enable = "avx512f")]
30493	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30494	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30495	pub unsafe fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30496	_mm512_cmpeq_epu64_mask(a, b) & k1
30497	}
30498
30499	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
30500	///
30501	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
30502	#[inline]
30503	#[target_feature(enable = "avx512f,avx512vl")]
30504	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30505	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30506	pub unsafe fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30507	simd_bitmask::<__m256i, _>(simd_eq(x:a.as_u64x4(), y:b.as_u64x4()))
30508	}
30509
30510	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30511	///
30512	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
30513	#[inline]
30514	#[target_feature(enable = "avx512f,avx512vl")]
30515	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30516	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30517	pub unsafe fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30518	_mm256_cmpeq_epu64_mask(a, b) & k1
30519	}
30520
30521	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
30522	///
30523	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
30524	#[inline]
30525	#[target_feature(enable = "avx512f,avx512vl")]
30526	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30527	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30528	pub unsafe fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30529	simd_bitmask::<__m128i, _>(simd_eq(x:a.as_u64x2(), y:b.as_u64x2()))
30530	}
30531
30532	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30533	///
30534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
30535	#[inline]
30536	#[target_feature(enable = "avx512f,avx512vl")]
30537	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30538	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30539	pub unsafe fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30540	_mm_cmpeq_epu64_mask(a, b) & k1
30541	}
30542
30543	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
30544	///
30545	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
30546	#[inline]
30547	#[target_feature(enable = "avx512f")]
30548	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30549	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30550	pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30551	simd_bitmask::<__m512i, _>(simd_ne(x:a.as_u64x8(), y:b.as_u64x8()))
30552	}
30553
30554	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30555	///
30556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
30557	#[inline]
30558	#[target_feature(enable = "avx512f")]
30559	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30560	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30561	pub unsafe fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30562	_mm512_cmpneq_epu64_mask(a, b) & k1
30563	}
30564
30565	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
30566	///
30567	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
30568	#[inline]
30569	#[target_feature(enable = "avx512f,avx512vl")]
30570	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30571	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30572	pub unsafe fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30573	simd_bitmask::<__m256i, _>(simd_ne(x:a.as_u64x4(), y:b.as_u64x4()))
30574	}
30575
30576	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30577	///
30578	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
30579	#[inline]
30580	#[target_feature(enable = "avx512f,avx512vl")]
30581	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30582	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30583	pub unsafe fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30584	_mm256_cmpneq_epu64_mask(a, b) & k1
30585	}
30586
30587	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
30588	///
30589	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
30590	#[inline]
30591	#[target_feature(enable = "avx512f,avx512vl")]
30592	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30593	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30594	pub unsafe fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30595	simd_bitmask::<__m128i, _>(simd_ne(x:a.as_u64x2(), y:b.as_u64x2()))
30596	}
30597
30598	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30599	///
30600	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
30601	#[inline]
30602	#[target_feature(enable = "avx512f,avx512vl")]
30603	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30604	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30605	pub unsafe fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30606	_mm_cmpneq_epu64_mask(a, b) & k1
30607	}
30608
30609	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30610	///
30611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
30612	#[inline]
30613	#[target_feature(enable = "avx512f")]
30614	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30615	#[rustc_legacy_const_generics(`2`)]
30616	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
30617	pub unsafe fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30618	a: __m512i,
30619	b: __m512i,
30620	) -> __mmask8 {
30621	static_assert_uimm_bits!(IMM3, `3`);
30622	let neg_one: i8 = `-1`;
30623	let a: i64x8 = a.as_i64x8();
30624	let b: i64x8 = b.as_i64x8();
30625	let r: i8 = vpcmpuq(a, b, IMM3, m:neg_one);
30626	transmute(src:r)
30627	}
30628
30629	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30630	///
30631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
30632	#[inline]
30633	#[target_feature(enable = "avx512f")]
30634	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30635	#[rustc_legacy_const_generics(`3`)]
30636	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
30637	pub unsafe fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30638	k1: __mmask8,
30639	a: __m512i,
30640	b: __m512i,
30641	) -> __mmask8 {
30642	static_assert_uimm_bits!(IMM3, `3`);
30643	let a: i64x8 = a.as_i64x8();
30644	let b: i64x8 = b.as_i64x8();
30645	let r: i8 = vpcmpuq(a, b, IMM3, m:k1 as i8);
30646	transmute(src:r)
30647	}
30648
30649	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30650	///
30651	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
30652	#[inline]
30653	#[target_feature(enable = "avx512f,avx512vl")]
30654	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30655	#[rustc_legacy_const_generics(`2`)]
30656	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
30657	pub unsafe fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30658	a: __m256i,
30659	b: __m256i,
30660	) -> __mmask8 {
30661	static_assert_uimm_bits!(IMM3, `3`);
30662	let neg_one: i8 = `-1`;
30663	let a: i64x4 = a.as_i64x4();
30664	let b: i64x4 = b.as_i64x4();
30665	let r: i8 = vpcmpuq256(a, b, IMM3, m:neg_one);
30666	transmute(src:r)
30667	}
30668
30669	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30670	///
30671	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
30672	#[inline]
30673	#[target_feature(enable = "avx512f,avx512vl")]
30674	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30675	#[rustc_legacy_const_generics(`3`)]
30676	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
30677	pub unsafe fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30678	k1: __mmask8,
30679	a: __m256i,
30680	b: __m256i,
30681	) -> __mmask8 {
30682	static_assert_uimm_bits!(IMM3, `3`);
30683	let a: i64x4 = a.as_i64x4();
30684	let b: i64x4 = b.as_i64x4();
30685	let r: i8 = vpcmpuq256(a, b, IMM3, m:k1 as i8);
30686	transmute(src:r)
30687	}
30688
30689	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30690	///
30691	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
30692	#[inline]
30693	#[target_feature(enable = "avx512f,avx512vl")]
30694	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30695	#[rustc_legacy_const_generics(`2`)]
30696	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
30697	pub unsafe fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
30698	static_assert_uimm_bits!(IMM3, `3`);
30699	let neg_one: i8 = `-1`;
30700	let a: i64x2 = a.as_i64x2();
30701	let b: i64x2 = b.as_i64x2();
30702	let r: i8 = vpcmpuq128(a, b, IMM3, m:neg_one);
30703	transmute(src:r)
30704	}
30705
30706	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30707	///
30708	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
30709	#[inline]
30710	#[target_feature(enable = "avx512f,avx512vl")]
30711	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30712	#[rustc_legacy_const_generics(`3`)]
30713	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
30714	pub unsafe fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30715	k1: __mmask8,
30716	a: __m128i,
30717	b: __m128i,
30718	) -> __mmask8 {
30719	static_assert_uimm_bits!(IMM3, `3`);
30720	let a: i64x2 = a.as_i64x2();
30721	let b: i64x2 = b.as_i64x2();
30722	let r: i8 = vpcmpuq128(a, b, IMM3, m:k1 as i8);
30723	transmute(src:r)
30724	}
30725
30726	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
30727	///
30728	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
30729	#[inline]
30730	#[target_feature(enable = "avx512f")]
30731	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30732	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30733	pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30734	simd_bitmask::<__m512i, _>(simd_lt(x:a.as_i64x8(), y:b.as_i64x8()))
30735	}
30736
30737	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30738	///
30739	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
30740	#[inline]
30741	#[target_feature(enable = "avx512f")]
30742	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30743	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30744	pub unsafe fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30745	_mm512_cmplt_epi64_mask(a, b) & k1
30746	}
30747
30748	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
30749	///
30750	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
30751	#[inline]
30752	#[target_feature(enable = "avx512f,avx512vl")]
30753	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30754	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30755	pub unsafe fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30756	simd_bitmask::<__m256i, _>(simd_lt(x:a.as_i64x4(), y:b.as_i64x4()))
30757	}
30758
30759	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30760	///
30761	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
30762	#[inline]
30763	#[target_feature(enable = "avx512f,avx512vl")]
30764	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30765	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30766	pub unsafe fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30767	_mm256_cmplt_epi64_mask(a, b) & k1
30768	}
30769
30770	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
30771	///
30772	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
30773	#[inline]
30774	#[target_feature(enable = "avx512f,avx512vl")]
30775	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30776	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30777	pub unsafe fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30778	simd_bitmask::<__m128i, _>(simd_lt(x:a.as_i64x2(), y:b.as_i64x2()))
30779	}
30780
30781	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30782	///
30783	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
30784	#[inline]
30785	#[target_feature(enable = "avx512f,avx512vl")]
30786	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30787	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30788	pub unsafe fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30789	_mm_cmplt_epi64_mask(a, b) & k1
30790	}
30791
30792	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30793	///
30794	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
30795	#[inline]
30796	#[target_feature(enable = "avx512f")]
30797	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30798	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30799	pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30800	simd_bitmask::<__m512i, _>(simd_gt(x:a.as_i64x8(), y:b.as_i64x8()))
30801	}
30802
30803	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30804	///
30805	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
30806	#[inline]
30807	#[target_feature(enable = "avx512f")]
30808	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30809	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30810	pub unsafe fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30811	_mm512_cmpgt_epi64_mask(a, b) & k1
30812	}
30813
30814	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30815	///
30816	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
30817	#[inline]
30818	#[target_feature(enable = "avx512f,avx512vl")]
30819	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30820	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30821	pub unsafe fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30822	simd_bitmask::<__m256i, _>(simd_gt(x:a.as_i64x4(), y:b.as_i64x4()))
30823	}
30824
30825	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30826	///
30827	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
30828	#[inline]
30829	#[target_feature(enable = "avx512f,avx512vl")]
30830	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30831	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30832	pub unsafe fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30833	_mm256_cmpgt_epi64_mask(a, b) & k1
30834	}
30835
30836	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30837	///
30838	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
30839	#[inline]
30840	#[target_feature(enable = "avx512f,avx512vl")]
30841	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30842	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30843	pub unsafe fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30844	simd_bitmask::<__m128i, _>(simd_gt(x:a.as_i64x2(), y:b.as_i64x2()))
30845	}
30846
30847	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30848	///
30849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
30850	#[inline]
30851	#[target_feature(enable = "avx512f,avx512vl")]
30852	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30853	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30854	pub unsafe fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30855	_mm_cmpgt_epi64_mask(a, b) & k1
30856	}
30857
30858	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30859	///
30860	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
30861	#[inline]
30862	#[target_feature(enable = "avx512f")]
30863	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30864	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30865	pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30866	simd_bitmask::<__m512i, _>(simd_le(x:a.as_i64x8(), y:b.as_i64x8()))
30867	}
30868
30869	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30870	///
30871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
30872	#[inline]
30873	#[target_feature(enable = "avx512f")]
30874	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30875	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30876	pub unsafe fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30877	_mm512_cmple_epi64_mask(a, b) & k1
30878	}
30879
30880	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30881	///
30882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
30883	#[inline]
30884	#[target_feature(enable = "avx512f,avx512vl")]
30885	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30886	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30887	pub unsafe fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30888	simd_bitmask::<__m256i, _>(simd_le(x:a.as_i64x4(), y:b.as_i64x4()))
30889	}
30890
30891	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30892	///
30893	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
30894	#[inline]
30895	#[target_feature(enable = "avx512f,avx512vl")]
30896	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30897	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30898	pub unsafe fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30899	_mm256_cmple_epi64_mask(a, b) & k1
30900	}
30901
30902	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30903	///
30904	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
30905	#[inline]
30906	#[target_feature(enable = "avx512f,avx512vl")]
30907	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30908	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30909	pub unsafe fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30910	simd_bitmask::<__m128i, _>(simd_le(x:a.as_i64x2(), y:b.as_i64x2()))
30911	}
30912
30913	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30914	///
30915	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
30916	#[inline]
30917	#[target_feature(enable = "avx512f,avx512vl")]
30918	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30919	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30920	pub unsafe fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30921	_mm_cmple_epi64_mask(a, b) & k1
30922	}
30923
30924	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30925	///
30926	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
30927	#[inline]
30928	#[target_feature(enable = "avx512f")]
30929	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30930	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30931	pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30932	simd_bitmask::<__m512i, _>(simd_ge(x:a.as_i64x8(), y:b.as_i64x8()))
30933	}
30934
30935	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30936	///
30937	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
30938	#[inline]
30939	#[target_feature(enable = "avx512f")]
30940	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30941	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30942	pub unsafe fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30943	_mm512_cmpge_epi64_mask(a, b) & k1
30944	}
30945
30946	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30947	///
30948	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
30949	#[inline]
30950	#[target_feature(enable = "avx512f,avx512vl")]
30951	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30952	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30953	pub unsafe fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30954	simd_bitmask::<__m256i, _>(simd_ge(x:a.as_i64x4(), y:b.as_i64x4()))
30955	}
30956
30957	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30958	///
30959	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
30960	#[inline]
30961	#[target_feature(enable = "avx512f,avx512vl")]
30962	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30963	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30964	pub unsafe fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30965	_mm256_cmpge_epi64_mask(a, b) & k1
30966	}
30967
30968	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30969	///
30970	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
30971	#[inline]
30972	#[target_feature(enable = "avx512f,avx512vl")]
30973	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30974	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30975	pub unsafe fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30976	simd_bitmask::<__m128i, _>(simd_ge(x:a.as_i64x2(), y:b.as_i64x2()))
30977	}
30978
30979	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30980	///
30981	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
30982	#[inline]
30983	#[target_feature(enable = "avx512f,avx512vl")]
30984	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30985	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30986	pub unsafe fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30987	_mm_cmpge_epi64_mask(a, b) & k1
30988	}
30989
30990	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
30991	///
30992	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
30993	#[inline]
30994	#[target_feature(enable = "avx512f")]
30995	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30996	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30997	pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30998	simd_bitmask::<__m512i, _>(simd_eq(x:a.as_i64x8(), y:b.as_i64x8()))
30999	}
31000
31001	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31002	///
31003	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
31004	#[inline]
31005	#[target_feature(enable = "avx512f")]
31006	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31007	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31008	pub unsafe fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
31009	_mm512_cmpeq_epi64_mask(a, b) & k1
31010	}
31011
31012	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
31013	///
31014	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
31015	#[inline]
31016	#[target_feature(enable = "avx512f,avx512vl")]
31017	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31018	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31019	pub unsafe fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
31020	simd_bitmask::<__m256i, _>(simd_eq(x:a.as_i64x4(), y:b.as_i64x4()))
31021	}
31022
31023	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31024	///
31025	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
31026	#[inline]
31027	#[target_feature(enable = "avx512f,avx512vl")]
31028	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31029	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31030	pub unsafe fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31031	_mm256_cmpeq_epi64_mask(a, b) & k1
31032	}
31033
31034	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
31035	///
31036	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
31037	#[inline]
31038	#[target_feature(enable = "avx512f,avx512vl")]
31039	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31040	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31041	pub unsafe fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
31042	simd_bitmask::<__m128i, _>(simd_eq(x:a.as_i64x2(), y:b.as_i64x2()))
31043	}
31044
31045	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31046	///
31047	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
31048	#[inline]
31049	#[target_feature(enable = "avx512f,avx512vl")]
31050	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31051	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31052	pub unsafe fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31053	_mm_cmpeq_epi64_mask(a, b) & k1
31054	}
31055
31056	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
31057	///
31058	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
31059	#[inline]
31060	#[target_feature(enable = "avx512f")]
31061	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31062	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31063	pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
31064	simd_bitmask::<__m512i, _>(simd_ne(x:a.as_i64x8(), y:b.as_i64x8()))
31065	}
31066
31067	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31068	///
31069	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
31070	#[inline]
31071	#[target_feature(enable = "avx512f")]
31072	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31073	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31074	pub unsafe fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
31075	_mm512_cmpneq_epi64_mask(a, b) & k1
31076	}
31077
31078	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
31079	///
31080	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
31081	#[inline]
31082	#[target_feature(enable = "avx512f,avx512vl")]
31083	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31084	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31085	pub unsafe fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
31086	simd_bitmask::<__m256i, _>(simd_ne(x:a.as_i64x4(), y:b.as_i64x4()))
31087	}
31088
31089	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31090	///
31091	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
31092	#[inline]
31093	#[target_feature(enable = "avx512f,avx512vl")]
31094	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31095	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31096	pub unsafe fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31097	_mm256_cmpneq_epi64_mask(a, b) & k1
31098	}
31099
31100	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
31101	///
31102	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
31103	#[inline]
31104	#[target_feature(enable = "avx512f,avx512vl")]
31105	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31106	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31107	pub unsafe fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
31108	simd_bitmask::<__m128i, _>(simd_ne(x:a.as_i64x2(), y:b.as_i64x2()))
31109	}
31110
31111	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31112	///
31113	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
31114	#[inline]
31115	#[target_feature(enable = "avx512f,avx512vl")]
31116	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31117	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31118	pub unsafe fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31119	_mm_cmpneq_epi64_mask(a, b) & k1
31120	}
31121
31122	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31123	///
31124	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
31125	#[inline]
31126	#[target_feature(enable = "avx512f")]
31127	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31128	#[rustc_legacy_const_generics(`2`)]
31129	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31130	pub unsafe fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31131	a: __m512i,
31132	b: __m512i,
31133	) -> __mmask8 {
31134	static_assert_uimm_bits!(IMM3, `3`);
31135	let neg_one: i8 = `-1`;
31136	let a: i64x8 = a.as_i64x8();
31137	let b: i64x8 = b.as_i64x8();
31138	let r: i8 = vpcmpq(a, b, IMM3, m:neg_one);
31139	transmute(src:r)
31140	}
31141
31142	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31143	///
31144	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
31145	#[inline]
31146	#[target_feature(enable = "avx512f")]
31147	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31148	#[rustc_legacy_const_generics(`3`)]
31149	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31150	pub unsafe fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31151	k1: __mmask8,
31152	a: __m512i,
31153	b: __m512i,
31154	) -> __mmask8 {
31155	static_assert_uimm_bits!(IMM3, `3`);
31156	let a: i64x8 = a.as_i64x8();
31157	let b: i64x8 = b.as_i64x8();
31158	let r: i8 = vpcmpq(a, b, IMM3, m:k1 as i8);
31159	transmute(src:r)
31160	}
31161
31162	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31163	///
31164	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
31165	#[inline]
31166	#[target_feature(enable = "avx512f,avx512vl")]
31167	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31168	#[rustc_legacy_const_generics(`2`)]
31169	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31170	pub unsafe fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31171	a: __m256i,
31172	b: __m256i,
31173	) -> __mmask8 {
31174	static_assert_uimm_bits!(IMM3, `3`);
31175	let neg_one: i8 = `-1`;
31176	let a: i64x4 = a.as_i64x4();
31177	let b: i64x4 = b.as_i64x4();
31178	let r: i8 = vpcmpq256(a, b, IMM3, m:neg_one);
31179	transmute(src:r)
31180	}
31181
31182	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31183	///
31184	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
31185	#[inline]
31186	#[target_feature(enable = "avx512f,avx512vl")]
31187	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31188	#[rustc_legacy_const_generics(`3`)]
31189	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31190	pub unsafe fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31191	k1: __mmask8,
31192	a: __m256i,
31193	b: __m256i,
31194	) -> __mmask8 {
31195	static_assert_uimm_bits!(IMM3, `3`);
31196	let a: i64x4 = a.as_i64x4();
31197	let b: i64x4 = b.as_i64x4();
31198	let r: i8 = vpcmpq256(a, b, IMM3, m:k1 as i8);
31199	transmute(src:r)
31200	}
31201
31202	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31203	///
31204	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
31205	#[inline]
31206	#[target_feature(enable = "avx512f,avx512vl")]
31207	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31208	#[rustc_legacy_const_generics(`2`)]
31209	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31210	pub unsafe fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31211	static_assert_uimm_bits!(IMM3, `3`);
31212	let neg_one: i8 = `-1`;
31213	let a: i64x2 = a.as_i64x2();
31214	let b: i64x2 = b.as_i64x2();
31215	let r: i8 = vpcmpq128(a, b, IMM3, m:neg_one);
31216	transmute(src:r)
31217	}
31218
31219	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31220	///
31221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
31222	#[inline]
31223	#[target_feature(enable = "avx512f,avx512vl")]
31224	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31225	#[rustc_legacy_const_generics(`3`)]
31226	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31227	pub unsafe fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31228	k1: __mmask8,
31229	a: __m128i,
31230	b: __m128i,
31231	) -> __mmask8 {
31232	static_assert_uimm_bits!(IMM3, `3`);
31233	let a: i64x2 = a.as_i64x2();
31234	let b: i64x2 = b.as_i64x2();
31235	let r: i8 = vpcmpq128(a, b, IMM3, m:k1 as i8);
31236	transmute(src:r)
31237	}
31238
31239	/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
31240	///
31241	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
31242	#[inline]
31243	#[target_feature(enable = "avx512f")]
31244	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31245	pub unsafe fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
31246	simd_reduce_add_unordered(a.as_i32x16())
31247	}
31248
31249	/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
31250	///
31251	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
31252	#[inline]
31253	#[target_feature(enable = "avx512f")]
31254	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31255	pub unsafe fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
31256	simd_reduce_add_unordered(simd_select_bitmask(
31257	m:k,
31258	yes:a.as_i32x16(),
31259	no:_mm512_setzero_si512().as_i32x16(),
31260	))
31261	}
31262
31263	/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
31264	///
31265	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
31266	#[inline]
31267	#[target_feature(enable = "avx512f")]
31268	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31269	pub unsafe fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
31270	simd_reduce_add_unordered(a.as_i64x8())
31271	}
31272
31273	/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
31274	///
31275	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
31276	#[inline]
31277	#[target_feature(enable = "avx512f")]
31278	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31279	pub unsafe fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
31280	simd_reduce_add_unordered(simd_select_bitmask(
31281	m:k,
31282	yes:a.as_i64x8(),
31283	no:_mm512_setzero_si512().as_i64x8(),
31284	))
31285	}
31286
31287	/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
31288	///
31289	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
31290	#[inline]
31291	#[target_feature(enable = "avx512f")]
31292	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31293	pub unsafe fn _mm512_reduce_add_ps(a: __m512) -> f32 {
31294	simd_reduce_add_unordered(a.as_f32x16())
31295	}
31296
31297	/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
31298	///
31299	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
31300	#[inline]
31301	#[target_feature(enable = "avx512f")]
31302	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31303	pub unsafe fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
31304	simd_reduce_add_unordered(simd_select_bitmask(
31305	m:k,
31306	yes:a.as_f32x16(),
31307	no:_mm512_setzero_ps().as_f32x16(),
31308	))
31309	}
31310
31311	/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
31312	///
31313	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
31314	#[inline]
31315	#[target_feature(enable = "avx512f")]
31316	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31317	pub unsafe fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
31318	simd_reduce_add_unordered(a.as_f64x8())
31319	}
31320
31321	/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
31322	///
31323	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
31324	#[inline]
31325	#[target_feature(enable = "avx512f")]
31326	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31327	pub unsafe fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
31328	simd_reduce_add_unordered(simd_select_bitmask(
31329	m:k,
31330	yes:a.as_f64x8(),
31331	no:_mm512_setzero_pd().as_f64x8(),
31332	))
31333	}
31334
31335	/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
31336	///
31337	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
31338	#[inline]
31339	#[target_feature(enable = "avx512f")]
31340	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31341	pub unsafe fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
31342	simd_reduce_mul_unordered(a.as_i32x16())
31343	}
31344
31345	/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
31346	///
31347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
31348	#[inline]
31349	#[target_feature(enable = "avx512f")]
31350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31351	pub unsafe fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
31352	simd_reduce_mul_unordered(simd_select_bitmask(
31353	m:k,
31354	yes:a.as_i32x16(),
31355	no:_mm512_set1_epi32(`1`).as_i32x16(),
31356	))
31357	}
31358
31359	/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
31360	///
31361	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
31362	#[inline]
31363	#[target_feature(enable = "avx512f")]
31364	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31365	pub unsafe fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
31366	simd_reduce_mul_unordered(a.as_i64x8())
31367	}
31368
31369	/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
31370	///
31371	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
31372	#[inline]
31373	#[target_feature(enable = "avx512f")]
31374	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31375	pub unsafe fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
31376	simd_reduce_mul_unordered(simd_select_bitmask(
31377	m:k,
31378	yes:a.as_i64x8(),
31379	no:_mm512_set1_epi64(`1`).as_i64x8(),
31380	))
31381	}
31382
31383	/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
31384	///
31385	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
31386	#[inline]
31387	#[target_feature(enable = "avx512f")]
31388	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31389	pub unsafe fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
31390	simd_reduce_mul_unordered(a.as_f32x16())
31391	}
31392
31393	/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
31394	///
31395	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
31396	#[inline]
31397	#[target_feature(enable = "avx512f")]
31398	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31399	pub unsafe fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
31400	simd_reduce_mul_unordered(simd_select_bitmask(
31401	m:k,
31402	yes:a.as_f32x16(),
31403	no:_mm512_set1_ps(`1.`).as_f32x16(),
31404	))
31405	}
31406
31407	/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
31408	///
31409	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
31410	#[inline]
31411	#[target_feature(enable = "avx512f")]
31412	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31413	pub unsafe fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
31414	simd_reduce_mul_unordered(a.as_f64x8())
31415	}
31416
31417	/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
31418	///
31419	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
31420	#[inline]
31421	#[target_feature(enable = "avx512f")]
31422	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31423	pub unsafe fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
31424	simd_reduce_mul_unordered(simd_select_bitmask(
31425	m:k,
31426	yes:a.as_f64x8(),
31427	no:_mm512_set1_pd(`1.`).as_f64x8(),
31428	))
31429	}
31430
31431	/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
31432	///
31433	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
31434	#[inline]
31435	#[target_feature(enable = "avx512f")]
31436	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31437	pub unsafe fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
31438	simd_reduce_max(a.as_i32x16())
31439	}
31440
31441	/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
31442	///
31443	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
31444	#[inline]
31445	#[target_feature(enable = "avx512f")]
31446	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31447	pub unsafe fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
31448	simd_reduce_max(simd_select_bitmask(
31449	m:k,
31450	yes:a.as_i32x16(),
31451	no:_mm512_undefined_epi32().as_i32x16(),
31452	))
31453	}
31454
31455	/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
31456	///
31457	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
31458	#[inline]
31459	#[target_feature(enable = "avx512f")]
31460	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31461	pub unsafe fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
31462	simd_reduce_max(a.as_i64x8())
31463	}
31464
31465	/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
31466	///
31467	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
31468	#[inline]
31469	#[target_feature(enable = "avx512f")]
31470	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31471	pub unsafe fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
31472	simd_reduce_max(simd_select_bitmask(
31473	m:k,
31474	yes:a.as_i64x8(),
31475	no:_mm512_set1_epi64(`0`).as_i64x8(),
31476	))
31477	}
31478
31479	/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
31480	///
31481	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
31482	#[inline]
31483	#[target_feature(enable = "avx512f")]
31484	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31485	pub unsafe fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
31486	simd_reduce_max(a.as_u32x16())
31487	}
31488
31489	/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
31490	///
31491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
31492	#[inline]
31493	#[target_feature(enable = "avx512f")]
31494	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31495	pub unsafe fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
31496	simd_reduce_max(simd_select_bitmask(
31497	m:k,
31498	yes:a.as_u32x16(),
31499	no:_mm512_undefined_epi32().as_u32x16(),
31500	))
31501	}
31502
31503	/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
31504	///
31505	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
31506	#[inline]
31507	#[target_feature(enable = "avx512f")]
31508	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31509	pub unsafe fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
31510	simd_reduce_max(a.as_u64x8())
31511	}
31512
31513	/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
31514	///
31515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
31516	#[inline]
31517	#[target_feature(enable = "avx512f")]
31518	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31519	pub unsafe fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
31520	simd_reduce_max(simd_select_bitmask(
31521	m:k,
31522	yes:a.as_u64x8(),
31523	no:_mm512_set1_epi64(`0`).as_u64x8(),
31524	))
31525	}
31526
31527	/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
31528	///
31529	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
31530	#[inline]
31531	#[target_feature(enable = "avx512f")]
31532	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31533	pub unsafe fn _mm512_reduce_max_ps(a: __m512) -> f32 {
31534	simd_reduce_max(a.as_f32x16())
31535	}
31536
31537	/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
31538	///
31539	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
31540	#[inline]
31541	#[target_feature(enable = "avx512f")]
31542	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31543	pub unsafe fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
31544	simd_reduce_max(simd_select_bitmask(
31545	m:k,
31546	yes:a.as_f32x16(),
31547	no:_mm512_undefined_ps().as_f32x16(),
31548	))
31549	}
31550
31551	/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
31552	///
31553	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
31554	#[inline]
31555	#[target_feature(enable = "avx512f")]
31556	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31557	pub unsafe fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
31558	simd_reduce_max(a.as_f64x8())
31559	}
31560
31561	/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
31562	///
31563	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
31564	#[inline]
31565	#[target_feature(enable = "avx512f")]
31566	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31567	pub unsafe fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
31568	simd_reduce_max(simd_select_bitmask(
31569	m:k,
31570	yes:a.as_f64x8(),
31571	no:_mm512_undefined_pd().as_f64x8(),
31572	))
31573	}
31574
31575	/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
31576	///
31577	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
31578	#[inline]
31579	#[target_feature(enable = "avx512f")]
31580	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31581	pub unsafe fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
31582	simd_reduce_min(a.as_i32x16())
31583	}
31584
31585	/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
31586	///
31587	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
31588	#[inline]
31589	#[target_feature(enable = "avx512f")]
31590	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31591	pub unsafe fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
31592	simd_reduce_min(simd_select_bitmask(
31593	m:k,
31594	yes:a.as_i32x16(),
31595	no:_mm512_undefined_epi32().as_i32x16(),
31596	))
31597	}
31598
31599	/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
31600	///
31601	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
31602	#[inline]
31603	#[target_feature(enable = "avx512f")]
31604	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31605	pub unsafe fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
31606	simd_reduce_min(a.as_i64x8())
31607	}
31608
31609	/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
31610	///
31611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
31612	#[inline]
31613	#[target_feature(enable = "avx512f")]
31614	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31615	pub unsafe fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
31616	simd_reduce_min(simd_select_bitmask(
31617	m:k,
31618	yes:a.as_i64x8(),
31619	no:_mm512_set1_epi64(`0`).as_i64x8(),
31620	))
31621	}
31622
31623	/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
31624	///
31625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
31626	#[inline]
31627	#[target_feature(enable = "avx512f")]
31628	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31629	pub unsafe fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
31630	simd_reduce_min(a.as_u32x16())
31631	}
31632
31633	/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
31634	///
31635	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
31636	#[inline]
31637	#[target_feature(enable = "avx512f")]
31638	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31639	pub unsafe fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
31640	simd_reduce_min(simd_select_bitmask(
31641	m:k,
31642	yes:a.as_u32x16(),
31643	no:_mm512_undefined_epi32().as_u32x16(),
31644	))
31645	}
31646
31647	/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
31648	///
31649	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
31650	#[inline]
31651	#[target_feature(enable = "avx512f")]
31652	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31653	pub unsafe fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
31654	simd_reduce_min(a.as_u64x8())
31655	}
31656
31657	/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
31658	///
31659	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
31660	#[inline]
31661	#[target_feature(enable = "avx512f")]
31662	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31663	pub unsafe fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
31664	simd_reduce_min(simd_select_bitmask(
31665	m:k,
31666	yes:a.as_u64x8(),
31667	no:_mm512_set1_epi64(`0`).as_u64x8(),
31668	))
31669	}
31670
31671	/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
31672	///
31673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
31674	#[inline]
31675	#[target_feature(enable = "avx512f")]
31676	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31677	pub unsafe fn _mm512_reduce_min_ps(a: __m512) -> f32 {
31678	simd_reduce_min(a.as_f32x16())
31679	}
31680
31681	/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
31682	///
31683	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
31684	#[inline]
31685	#[target_feature(enable = "avx512f")]
31686	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31687	pub unsafe fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
31688	simd_reduce_min(simd_select_bitmask(
31689	m:k,
31690	yes:a.as_f32x16(),
31691	no:_mm512_undefined_ps().as_f32x16(),
31692	))
31693	}
31694
31695	/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
31696	///
31697	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
31698	#[inline]
31699	#[target_feature(enable = "avx512f")]
31700	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31701	pub unsafe fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
31702	simd_reduce_min(a.as_f64x8())
31703	}
31704
31705	/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
31706	///
31707	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
31708	#[inline]
31709	#[target_feature(enable = "avx512f")]
31710	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31711	pub unsafe fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
31712	simd_reduce_min(simd_select_bitmask(
31713	m:k,
31714	yes:a.as_f64x8(),
31715	no:_mm512_undefined_pd().as_f64x8(),
31716	))
31717	}
31718
31719	/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
31720	///
31721	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
31722	#[inline]
31723	#[target_feature(enable = "avx512f")]
31724	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31725	pub unsafe fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
31726	simd_reduce_and(a.as_i32x16())
31727	}
31728
31729	/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
31730	///
31731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
31732	#[inline]
31733	#[target_feature(enable = "avx512f")]
31734	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31735	pub unsafe fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
31736	simd_reduce_and(simd_select_bitmask(
31737	m:k,
31738	yes:a.as_i32x16(),
31739	no:_mm512_set1_epi32(`0xFF`).as_i32x16(),
31740	))
31741	}
31742
31743	/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
31744	///
31745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
31746	#[inline]
31747	#[target_feature(enable = "avx512f")]
31748	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31749	pub unsafe fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
31750	simd_reduce_and(a.as_i64x8())
31751	}
31752
31753	/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
31754	///
31755	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
31756	#[inline]
31757	#[target_feature(enable = "avx512f")]
31758	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31759	pub unsafe fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
31760	simd_reduce_and(simd_select_bitmask(
31761	m:k,
31762	yes:a.as_i64x8(),
31763	no:_mm512_set1_epi64(`1` << `0` \| `1` << `1` \| `1` << `2` \| `1` << `3` \| `1` << `4` \| `1` << `5` \| `1` << `6` \| `1` << `7`)
31764	.as_i64x8(),
31765	))
31766	}
31767
31768	/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
31769	///
31770	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
31771	#[inline]
31772	#[target_feature(enable = "avx512f")]
31773	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31774	pub unsafe fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
31775	simd_reduce_or(a.as_i32x16())
31776	}
31777
31778	/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
31779	///
31780	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
31781	#[inline]
31782	#[target_feature(enable = "avx512f")]
31783	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31784	pub unsafe fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
31785	simd_reduce_or(simd_select_bitmask(
31786	m:k,
31787	yes:a.as_i32x16(),
31788	no:_mm512_setzero_si512().as_i32x16(),
31789	))
31790	}
31791
31792	/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
31793	///
31794	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
31795	#[inline]
31796	#[target_feature(enable = "avx512f")]
31797	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31798	pub unsafe fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
31799	simd_reduce_or(a.as_i64x8())
31800	}
31801
31802	/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
31803	///
31804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
31805	#[inline]
31806	#[target_feature(enable = "avx512f")]
31807	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31808	pub unsafe fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
31809	simd_reduce_or(simd_select_bitmask(
31810	m:k,
31811	yes:a.as_i64x8(),
31812	no:_mm512_setzero_si512().as_i64x8(),
31813	))
31814	}
31815
31816	/// Returns vector of type `__m512d` with indeterminate elements.
31817	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
31818	/// In practice, this is equivalent to [`mem::zeroed`].
31819	///
31820	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
31821	#[inline]
31822	#[target_feature(enable = "avx512f")]
31823	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31824	// This intrinsic has no corresponding instruction.
31825	pub unsafe fn _mm512_undefined_pd() -> __m512d {
31826	_mm512_set1_pd(`0.0`)
31827	}
31828
31829	/// Returns vector of type `__m512` with indeterminate elements.
31830	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
31831	/// In practice, this is equivalent to [`mem::zeroed`].
31832	///
31833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
31834	#[inline]
31835	#[target_feature(enable = "avx512f")]
31836	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31837	// This intrinsic has no corresponding instruction.
31838	pub unsafe fn _mm512_undefined_ps() -> __m512 {
31839	_mm512_set1_ps(`0.0`)
31840	}
31841
31842	/// Return vector of type __m512i with indeterminate elements.
31843	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
31844	/// In practice, this is equivalent to [`mem::zeroed`].
31845	///
31846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
31847	#[inline]
31848	#[target_feature(enable = "avx512f")]
31849	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31850	// This intrinsic has no corresponding instruction.
31851	pub unsafe fn _mm512_undefined_epi32() -> __m512i {
31852	_mm512_set1_epi32(`0`)
31853	}
31854
31855	/// Return vector of type __m512 with indeterminate elements.
31856	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
31857	/// In practice, this is equivalent to [`mem::zeroed`].
31858	///
31859	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
31860	#[inline]
31861	#[target_feature(enable = "avx512f")]
31862	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31863	// This intrinsic has no corresponding instruction.
31864	pub unsafe fn _mm512_undefined() -> __m512 {
31865	_mm512_set1_ps(`0.0`)
31866	}
31867
31868	/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
31869	///
31870	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
31871	#[inline]
31872	#[target_feature(enable = "avx512f")]
31873	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31874	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
31875	pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
31876	ptr::read_unaligned(src:mem_addr as *const __m512i)
31877	}
31878
31879	/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
31880	///
31881	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
31882	#[inline]
31883	#[target_feature(enable = "avx512f,avx512vl")]
31884	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31885	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
31886	pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
31887	ptr::read_unaligned(src:mem_addr as *const __m256i)
31888	}
31889
31890	/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
31891	///
31892	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
31893	#[inline]
31894	#[target_feature(enable = "avx512f,avx512vl")]
31895	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31896	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
31897	pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
31898	ptr::read_unaligned(src:mem_addr as *const __m128i)
31899	}
31900
31901	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31902	///
31903	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
31904	#[inline]
31905	#[target_feature(enable = "avx512f")]
31906	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31907	#[cfg_attr(test, assert_instr(vpmovdw))]
31908	pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
31909	vpmovdwmem(mem_addr, a:a.as_i32x16(), mask:k);
31910	}
31911
31912	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31913	///
31914	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
31915	#[inline]
31916	#[target_feature(enable = "avx512f,avx512vl")]
31917	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31918	#[cfg_attr(test, assert_instr(vpmovdw))]
31919	pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
31920	vpmovdwmem256(mem_addr, a:a.as_i32x8(), mask:k);
31921	}
31922
31923	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31924	///
31925	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
31926	#[inline]
31927	#[target_feature(enable = "avx512f,avx512vl")]
31928	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31929	#[cfg_attr(test, assert_instr(vpmovdw))]
31930	pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
31931	vpmovdwmem128(mem_addr, a:a.as_i32x4(), mask:k);
31932	}
31933
31934	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31935	///
31936	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
31937	#[inline]
31938	#[target_feature(enable = "avx512f")]
31939	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31940	#[cfg_attr(test, assert_instr(vpmovsdw))]
31941	pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
31942	vpmovsdwmem(mem_addr, a:a.as_i32x16(), mask:k);
31943	}
31944
31945	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31946	///
31947	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
31948	#[inline]
31949	#[target_feature(enable = "avx512f,avx512vl")]
31950	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31951	#[cfg_attr(test, assert_instr(vpmovsdw))]
31952	pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
31953	vpmovsdwmem256(mem_addr, a:a.as_i32x8(), mask:k);
31954	}
31955
31956	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31957	///
31958	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
31959	#[inline]
31960	#[target_feature(enable = "avx512f,avx512vl")]
31961	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31962	#[cfg_attr(test, assert_instr(vpmovsdw))]
31963	pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
31964	vpmovsdwmem128(mem_addr, a:a.as_i32x4(), mask:k);
31965	}
31966
31967	/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31968	///
31969	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
31970	#[inline]
31971	#[target_feature(enable = "avx512f")]
31972	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31973	#[cfg_attr(test, assert_instr(vpmovusdw))]
31974	pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
31975	vpmovusdwmem(mem_addr, a:a.as_i32x16(), mask:k);
31976	}
31977
31978	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31979	///
31980	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
31981	#[inline]
31982	#[target_feature(enable = "avx512f,avx512vl")]
31983	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31984	#[cfg_attr(test, assert_instr(vpmovusdw))]
31985	pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
31986	vpmovusdwmem256(mem_addr, a:a.as_i32x8(), mask:k);
31987	}
31988
31989	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31990	///
31991	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
31992	#[inline]
31993	#[target_feature(enable = "avx512f,avx512vl")]
31994	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31995	#[cfg_attr(test, assert_instr(vpmovusdw))]
31996	pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
31997	vpmovusdwmem128(mem_addr, a:a.as_i32x4(), mask:k);
31998	}
31999
32000	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32001	///
32002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
32003	#[inline]
32004	#[target_feature(enable = "avx512f")]
32005	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32006	#[cfg_attr(test, assert_instr(vpmovdb))]
32007	pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
32008	vpmovdbmem(mem_addr, a:a.as_i32x16(), mask:k);
32009	}
32010
32011	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32012	///
32013	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
32014	#[inline]
32015	#[target_feature(enable = "avx512f,avx512vl")]
32016	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32017	#[cfg_attr(test, assert_instr(vpmovdb))]
32018	pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32019	vpmovdbmem256(mem_addr, a:a.as_i32x8(), mask:k);
32020	}
32021
32022	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32023	///
32024	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
32025	#[inline]
32026	#[target_feature(enable = "avx512f,avx512vl")]
32027	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32028	#[cfg_attr(test, assert_instr(vpmovdb))]
32029	pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32030	vpmovdbmem128(mem_addr, a:a.as_i32x4(), mask:k);
32031	}
32032
32033	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32034	///
32035	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
32036	#[inline]
32037	#[target_feature(enable = "avx512f")]
32038	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32039	#[cfg_attr(test, assert_instr(vpmovsdb))]
32040	pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
32041	vpmovsdbmem(mem_addr, a:a.as_i32x16(), mask:k);
32042	}
32043
32044	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32045	///
32046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
32047	#[inline]
32048	#[target_feature(enable = "avx512f,avx512vl")]
32049	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32050	#[cfg_attr(test, assert_instr(vpmovsdb))]
32051	pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32052	vpmovsdbmem256(mem_addr, a:a.as_i32x8(), mask:k);
32053	}
32054
32055	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32056	///
32057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
32058	#[inline]
32059	#[target_feature(enable = "avx512f,avx512vl")]
32060	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32061	#[cfg_attr(test, assert_instr(vpmovsdb))]
32062	pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32063	vpmovsdbmem128(mem_addr, a:a.as_i32x4(), mask:k);
32064	}
32065
32066	/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32067	///
32068	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
32069	#[inline]
32070	#[target_feature(enable = "avx512f")]
32071	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32072	#[cfg_attr(test, assert_instr(vpmovusdb))]
32073	pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
32074	vpmovusdbmem(mem_addr, a:a.as_i32x16(), mask:k);
32075	}
32076
32077	/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32078	///
32079	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
32080	#[inline]
32081	#[target_feature(enable = "avx512f,avx512vl")]
32082	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32083	#[cfg_attr(test, assert_instr(vpmovusdb))]
32084	pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32085	vpmovusdbmem256(mem_addr, a:a.as_i32x8(), mask:k);
32086	}
32087
32088	/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32089	///
32090	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
32091	#[inline]
32092	#[target_feature(enable = "avx512f,avx512vl")]
32093	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32094	#[cfg_attr(test, assert_instr(vpmovusdb))]
32095	pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32096	vpmovusdbmem128(mem_addr, a:a.as_i32x4(), mask:k);
32097	}
32098
32099	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32100	///
32101	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
32102	#[inline]
32103	#[target_feature(enable = "avx512f")]
32104	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32105	#[cfg_attr(test, assert_instr(vpmovqw))]
32106	pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32107	vpmovqwmem(mem_addr, a:a.as_i64x8(), mask:k);
32108	}
32109
32110	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32111	///
32112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
32113	#[inline]
32114	#[target_feature(enable = "avx512f,avx512vl")]
32115	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32116	#[cfg_attr(test, assert_instr(vpmovqw))]
32117	pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32118	vpmovqwmem256(mem_addr, a:a.as_i64x4(), mask:k);
32119	}
32120
32121	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32122	///
32123	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
32124	#[inline]
32125	#[target_feature(enable = "avx512f,avx512vl")]
32126	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32127	#[cfg_attr(test, assert_instr(vpmovqw))]
32128	pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32129	vpmovqwmem128(mem_addr, a:a.as_i64x2(), mask:k);
32130	}
32131
32132	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32133	///
32134	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
32135	#[inline]
32136	#[target_feature(enable = "avx512f")]
32137	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32138	#[cfg_attr(test, assert_instr(vpmovsqw))]
32139	pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32140	vpmovsqwmem(mem_addr, a:a.as_i64x8(), mask:k);
32141	}
32142
32143	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32144	///
32145	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
32146	#[inline]
32147	#[target_feature(enable = "avx512f,avx512vl")]
32148	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32149	#[cfg_attr(test, assert_instr(vpmovsqw))]
32150	pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32151	vpmovsqwmem256(mem_addr, a:a.as_i64x4(), mask:k);
32152	}
32153
32154	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32155	///
32156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
32157	#[inline]
32158	#[target_feature(enable = "avx512f,avx512vl")]
32159	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32160	#[cfg_attr(test, assert_instr(vpmovsqw))]
32161	pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32162	vpmovsqwmem128(mem_addr, a:a.as_i64x2(), mask:k);
32163	}
32164
32165	/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32166	///
32167	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
32168	#[inline]
32169	#[target_feature(enable = "avx512f")]
32170	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32171	#[cfg_attr(test, assert_instr(vpmovusqw))]
32172	pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32173	vpmovusqwmem(mem_addr, a:a.as_i64x8(), mask:k);
32174	}
32175
32176	/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32177	///
32178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
32179	#[inline]
32180	#[target_feature(enable = "avx512f,avx512vl")]
32181	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32182	#[cfg_attr(test, assert_instr(vpmovusqw))]
32183	pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32184	vpmovusqwmem256(mem_addr, a:a.as_i64x4(), mask:k);
32185	}
32186
32187	/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32188	///
32189	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
32190	#[inline]
32191	#[target_feature(enable = "avx512f,avx512vl")]
32192	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32193	#[cfg_attr(test, assert_instr(vpmovusqw))]
32194	pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32195	vpmovusqwmem128(mem_addr, a:a.as_i64x2(), mask:k);
32196	}
32197
32198	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32199	///
32200	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
32201	#[inline]
32202	#[target_feature(enable = "avx512f")]
32203	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32204	#[cfg_attr(test, assert_instr(vpmovqb))]
32205	pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32206	vpmovqbmem(mem_addr, a:a.as_i64x8(), mask:k);
32207	}
32208
32209	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32210	///
32211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
32212	#[inline]
32213	#[target_feature(enable = "avx512f,avx512vl")]
32214	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32215	#[cfg_attr(test, assert_instr(vpmovqb))]
32216	pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32217	vpmovqbmem256(mem_addr, a:a.as_i64x4(), mask:k);
32218	}
32219
32220	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32221	///
32222	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
32223	#[inline]
32224	#[target_feature(enable = "avx512f,avx512vl")]
32225	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32226	#[cfg_attr(test, assert_instr(vpmovqb))]
32227	pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32228	vpmovqbmem128(mem_addr, a:a.as_i64x2(), mask:k);
32229	}
32230
32231	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32232	///
32233	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
32234	#[inline]
32235	#[target_feature(enable = "avx512f")]
32236	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32237	#[cfg_attr(test, assert_instr(vpmovsqb))]
32238	pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32239	vpmovsqbmem(mem_addr, a:a.as_i64x8(), mask:k);
32240	}
32241
32242	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32243	///
32244	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
32245	#[inline]
32246	#[target_feature(enable = "avx512f,avx512vl")]
32247	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32248	#[cfg_attr(test, assert_instr(vpmovsqb))]
32249	pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32250	vpmovsqbmem256(mem_addr, a:a.as_i64x4(), mask:k);
32251	}
32252
32253	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32254	///
32255	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
32256	#[inline]
32257	#[target_feature(enable = "avx512f,avx512vl")]
32258	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32259	#[cfg_attr(test, assert_instr(vpmovsqb))]
32260	pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32261	vpmovsqbmem128(mem_addr, a:a.as_i64x2(), mask:k);
32262	}
32263
32264	/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32265	///
32266	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
32267	#[inline]
32268	#[target_feature(enable = "avx512f")]
32269	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32270	#[cfg_attr(test, assert_instr(vpmovusqb))]
32271	pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32272	vpmovusqbmem(mem_addr, a:a.as_i64x8(), mask:k);
32273	}
32274
32275	/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32276	///
32277	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
32278	#[inline]
32279	#[target_feature(enable = "avx512f,avx512vl")]
32280	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32281	#[cfg_attr(test, assert_instr(vpmovusqb))]
32282	pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32283	vpmovusqbmem256(mem_addr, a:a.as_i64x4(), mask:k);
32284	}
32285
32286	/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32287	///
32288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
32289	#[inline]
32290	#[target_feature(enable = "avx512f,avx512vl")]
32291	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32292	#[cfg_attr(test, assert_instr(vpmovusqb))]
32293	pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32294	vpmovusqbmem128(mem_addr, a:a.as_i64x2(), mask:k);
32295	}
32296
32297	///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32298	///
32299	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
32300	#[inline]
32301	#[target_feature(enable = "avx512f")]
32302	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32303	#[cfg_attr(test, assert_instr(vpmovqd))]
32304	pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32305	vpmovqdmem(mem_addr, a:a.as_i64x8(), mask:k);
32306	}
32307
32308	///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32309	///
32310	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
32311	#[inline]
32312	#[target_feature(enable = "avx512f,avx512vl")]
32313	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32314	#[cfg_attr(test, assert_instr(vpmovqd))]
32315	pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32316	vpmovqdmem256(mem_addr, a:a.as_i64x4(), mask:k);
32317	}
32318
32319	///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32320	///
32321	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
32322	#[inline]
32323	#[target_feature(enable = "avx512f,avx512vl")]
32324	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32325	#[cfg_attr(test, assert_instr(vpmovqd))]
32326	pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32327	vpmovqdmem128(mem_addr, a:a.as_i64x2(), mask:k);
32328	}
32329
32330	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32331	///
32332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
32333	#[inline]
32334	#[target_feature(enable = "avx512f")]
32335	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32336	#[cfg_attr(test, assert_instr(vpmovsqd))]
32337	pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32338	vpmovsqdmem(mem_addr, a:a.as_i64x8(), mask:k);
32339	}
32340
32341	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32342	///
32343	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
32344	#[inline]
32345	#[target_feature(enable = "avx512f,avx512vl")]
32346	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32347	#[cfg_attr(test, assert_instr(vpmovsqd))]
32348	pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32349	vpmovsqdmem256(mem_addr, a:a.as_i64x4(), mask:k);
32350	}
32351
32352	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32353	///
32354	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
32355	#[inline]
32356	#[target_feature(enable = "avx512f,avx512vl")]
32357	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32358	#[cfg_attr(test, assert_instr(vpmovsqd))]
32359	pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32360	vpmovsqdmem128(mem_addr, a:a.as_i64x2(), mask:k);
32361	}
32362
32363	/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32364	///
32365	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
32366	#[inline]
32367	#[target_feature(enable = "avx512f")]
32368	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32369	#[cfg_attr(test, assert_instr(vpmovusqd))]
32370	pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32371	vpmovusqdmem(mem_addr, a:a.as_i64x8(), mask:k);
32372	}
32373
32374	/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32375	///
32376	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
32377	#[inline]
32378	#[target_feature(enable = "avx512f,avx512vl")]
32379	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32380	#[cfg_attr(test, assert_instr(vpmovusqd))]
32381	pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32382	vpmovusqdmem256(mem_addr, a:a.as_i64x4(), mask:k);
32383	}
32384
32385	/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32386	///
32387	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
32388	#[inline]
32389	#[target_feature(enable = "avx512f,avx512vl")]
32390	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32391	#[cfg_attr(test, assert_instr(vpmovusqd))]
32392	pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32393	vpmovusqdmem128(mem_addr, a:a.as_i64x2(), mask:k);
32394	}
32395
32396	/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32397	///
32398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
32399	#[inline]
32400	#[target_feature(enable = "avx512f")]
32401	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32402	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32403	pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
32404	ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
32405	}
32406
32407	/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32408	///
32409	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
32410	#[inline]
32411	#[target_feature(enable = "avx512f,avx512vl")]
32412	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32413	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32414	pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
32415	ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
32416	}
32417
32418	/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32419	///
32420	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
32421	#[inline]
32422	#[target_feature(enable = "avx512f,avx512vl")]
32423	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32424	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32425	pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
32426	ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
32427	}
32428
32429	/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32430	///
32431	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
32432	#[inline]
32433	#[target_feature(enable = "avx512f")]
32434	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32435	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32436	pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
32437	ptr::read_unaligned(src:mem_addr as *const __m512i)
32438	}
32439
32440	/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32441	///
32442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
32443	#[inline]
32444	#[target_feature(enable = "avx512f,avx512vl")]
32445	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32446	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32447	pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
32448	ptr::read_unaligned(src:mem_addr as *const __m256i)
32449	}
32450
32451	/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32452	///
32453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
32454	#[inline]
32455	#[target_feature(enable = "avx512f,avx512vl")]
32456	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32457	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32458	pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
32459	ptr::read_unaligned(src:mem_addr as *const __m128i)
32460	}
32461
32462	/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32463	///
32464	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
32465	#[inline]
32466	#[target_feature(enable = "avx512f")]
32467	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32468	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32469	pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
32470	ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
32471	}
32472
32473	/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32474	///
32475	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
32476	#[inline]
32477	#[target_feature(enable = "avx512f,avx512vl")]
32478	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32479	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32480	pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
32481	ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
32482	}
32483
32484	/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32485	///
32486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
32487	#[inline]
32488	#[target_feature(enable = "avx512f,avx512vl")]
32489	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32490	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32491	pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
32492	ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
32493	}
32494
32495	/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32496	///
32497	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
32498	#[inline]
32499	#[target_feature(enable = "avx512f")]
32500	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32501	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32502	pub unsafe fn _mm512_loadu_si512(mem_addr: *const i32) -> __m512i {
32503	ptr::read_unaligned(src:mem_addr as *const __m512i)
32504	}
32505
32506	/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
32507	///
32508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
32509	#[inline]
32510	#[target_feature(enable = "avx512f")]
32511	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32512	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32513	pub unsafe fn _mm512_storeu_si512(mem_addr: *mut i32, a: __m512i) {
32514	ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
32515	}
32516
32517	/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
32518	/// floating-point elements) from memory into result.
32519	/// `mem_addr` does not need to be aligned on any particular boundary.
32520	///
32521	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
32522	#[inline]
32523	#[target_feature(enable = "avx512f")]
32524	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32525	#[cfg_attr(test, assert_instr(vmovups))]
32526	pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
32527	ptr::read_unaligned(src:mem_addr as *const __m512d)
32528	}
32529
32530	/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
32531	/// floating-point elements) from `a` into memory.
32532	/// `mem_addr` does not need to be aligned on any particular boundary.
32533	///
32534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
32535	#[inline]
32536	#[target_feature(enable = "avx512f")]
32537	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32538	#[cfg_attr(test, assert_instr(vmovups))]
32539	pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
32540	ptr::write_unaligned(dst:mem_addr as *mut __m512d, src:a);
32541	}
32542
32543	/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
32544	/// floating-point elements) from memory into result.
32545	/// `mem_addr` does not need to be aligned on any particular boundary.
32546	///
32547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
32548	#[inline]
32549	#[target_feature(enable = "avx512f")]
32550	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32551	#[cfg_attr(test, assert_instr(vmovups))]
32552	pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
32553	ptr::read_unaligned(src:mem_addr as *const __m512)
32554	}
32555
32556	/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
32557	/// floating-point elements) from `a` into memory.
32558	/// `mem_addr` does not need to be aligned on any particular boundary.
32559	///
32560	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
32561	#[inline]
32562	#[target_feature(enable = "avx512f")]
32563	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32564	#[cfg_attr(test, assert_instr(vmovups))]
32565	pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
32566	ptr::write_unaligned(dst:mem_addr as *mut __m512, src:a);
32567	}
32568
32569	/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32570	///
32571	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_load_si512&expand=3345)
32572	#[inline]
32573	#[target_feature(enable = "avx512f")]
32574	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32575	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32576	pub unsafe fn _mm512_load_si512(mem_addr: *const i32) -> __m512i {
32577	ptr::read(src:mem_addr as *const __m512i)
32578	}
32579
32580	/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32581	///
32582	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_store_si512&expand=5598)
32583	#[inline]
32584	#[target_feature(enable = "avx512f")]
32585	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32586	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32587	pub unsafe fn _mm512_store_si512(mem_addr: *mut i32, a: __m512i) {
32588	ptr::write(dst:mem_addr as *mut __m512i, src:a);
32589	}
32590
32591	/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32592	///
32593	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
32594	#[inline]
32595	#[target_feature(enable = "avx512f")]
32596	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32597	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32598	pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
32599	ptr::read(src:mem_addr as *const __m512i)
32600	}
32601
32602	/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
32603	///
32604	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
32605	#[inline]
32606	#[target_feature(enable = "avx512f,avx512vl")]
32607	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32608	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32609	pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
32610	ptr::read(src:mem_addr as *const __m256i)
32611	}
32612
32613	/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
32614	///
32615	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
32616	#[inline]
32617	#[target_feature(enable = "avx512f,avx512vl")]
32618	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32619	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32620	pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
32621	ptr::read(src:mem_addr as *const __m128i)
32622	}
32623
32624	/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32625	///
32626	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_store_epi32&expand=5569)
32627	#[inline]
32628	#[target_feature(enable = "avx512f")]
32629	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32630	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32631	pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
32632	ptr::write(dst:mem_addr as *mut __m512i, src:a);
32633	}
32634
32635	/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
32636	///
32637	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
32638	#[inline]
32639	#[target_feature(enable = "avx512f,avx512vl")]
32640	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32641	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32642	pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
32643	ptr::write(dst:mem_addr as *mut __m256i, src:a);
32644	}
32645
32646	/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
32647	///
32648	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
32649	#[inline]
32650	#[target_feature(enable = "avx512f,avx512vl")]
32651	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32652	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32653	pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
32654	ptr::write(dst:mem_addr as *mut __m128i, src:a);
32655	}
32656
32657	/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32658	///
32659	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
32660	#[inline]
32661	#[target_feature(enable = "avx512f")]
32662	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32663	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32664	pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
32665	ptr::read(src:mem_addr as *const __m512i)
32666	}
32667
32668	/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
32669	///
32670	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
32671	#[inline]
32672	#[target_feature(enable = "avx512f,avx512vl")]
32673	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32674	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32675	pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
32676	ptr::read(src:mem_addr as *const __m256i)
32677	}
32678
32679	/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
32680	///
32681	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
32682	#[inline]
32683	#[target_feature(enable = "avx512f,avx512vl")]
32684	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32685	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32686	pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
32687	ptr::read(src:mem_addr as *const __m128i)
32688	}
32689
32690	/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32691	///
32692	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
32693	#[inline]
32694	#[target_feature(enable = "avx512f")]
32695	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32696	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32697	pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
32698	ptr::write(dst:mem_addr as *mut __m512i, src:a);
32699	}
32700
32701	/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
32702	///
32703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
32704	#[inline]
32705	#[target_feature(enable = "avx512f,avx512vl")]
32706	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32707	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32708	pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
32709	ptr::write(dst:mem_addr as *mut __m256i, src:a);
32710	}
32711
32712	/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
32713	///
32714	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
32715	#[inline]
32716	#[target_feature(enable = "avx512f,avx512vl")]
32717	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32718	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32719	pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
32720	ptr::write(dst:mem_addr as *mut __m128i, src:a);
32721	}
32722
32723	/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32724	///
32725	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
32726	#[inline]
32727	#[target_feature(enable = "avx512f")]
32728	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32729	#[cfg_attr(test, assert_instr(vmovaps))]
32730	pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
32731	ptr::read(src:mem_addr as *const __m512)
32732	}
32733
32734	/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32735	///
32736	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
32737	#[inline]
32738	#[target_feature(enable = "avx512f")]
32739	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32740	#[cfg_attr(test, assert_instr(vmovaps))]
32741	pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
32742	ptr::write(dst:mem_addr as *mut __m512, src:a);
32743	}
32744
32745	/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32746	///
32747	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
32748	#[inline]
32749	#[target_feature(enable = "avx512f")]
32750	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32751	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
32752	pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
32753	ptr::read(src:mem_addr as *const __m512d)
32754	}
32755
32756	/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32757	///
32758	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
32759	#[inline]
32760	#[target_feature(enable = "avx512f")]
32761	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32762	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
32763	pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
32764	ptr::write(dst:mem_addr as *mut __m512d, src:a);
32765	}
32766
32767	/// Load packed 32-bit integers from memory into dst using writemask k
32768	/// (elements are copied from src when the corresponding mask bit is not set).
32769	/// mem_addr does not need to be aligned on any particular boundary.
32770	///
32771	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
32772	#[inline]
32773	#[target_feature(enable = "avx512f")]
32774	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32775	pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
32776	let mut dst: __m512i = src;
32777	asm!(
32778	vpl!("vmovdqu32 {dst}{{{k}}}"),
32779	p = in(reg) mem_addr,
32780	k = in(kreg) k,
32781	dst = inout(zmm_reg) dst,
32782	options(pure, readonly, nostack)
32783	);
32784	dst
32785	}
32786
32787	/// Load packed 32-bit integers from memory into dst using zeromask k
32788	/// (elements are zeroed out when the corresponding mask bit is not set).
32789	/// mem_addr does not need to be aligned on any particular boundary.
32790	///
32791	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
32792	#[inline]
32793	#[target_feature(enable = "avx512f")]
32794	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32795	pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
32796	let mut dst: __m512i;
32797	asm!(
32798	vpl!("vmovdqu32 {dst}{{{k}}} {{z}}"),
32799	p = in(reg) mem_addr,
32800	k = in(kreg) k,
32801	dst = out(zmm_reg) dst,
32802	options(pure, readonly, nostack)
32803	);
32804	dst
32805	}
32806
32807	/// Load packed 64-bit integers from memory into dst using writemask k
32808	/// (elements are copied from src when the corresponding mask bit is not set).
32809	/// mem_addr does not need to be aligned on any particular boundary.
32810	///
32811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
32812	#[inline]
32813	#[target_feature(enable = "avx512f")]
32814	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32815	pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
32816	let mut dst: __m512i = src;
32817	asm!(
32818	vpl!("vmovdqu64 {dst}{{{k}}}"),
32819	p = in(reg) mem_addr,
32820	k = in(kreg) k,
32821	dst = inout(zmm_reg) dst,
32822	options(pure, readonly, nostack)
32823	);
32824	dst
32825	}
32826
32827	/// Load packed 64-bit integers from memory into dst using zeromask k
32828	/// (elements are zeroed out when the corresponding mask bit is not set).
32829	/// mem_addr does not need to be aligned on any particular boundary.
32830	///
32831	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
32832	#[inline]
32833	#[target_feature(enable = "avx512f")]
32834	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32835	pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
32836	let mut dst: __m512i;
32837	asm!(
32838	vpl!("vmovdqu64 {dst}{{{k}}} {{z}}"),
32839	p = in(reg) mem_addr,
32840	k = in(kreg) k,
32841	dst = out(zmm_reg) dst,
32842	options(pure, readonly, nostack)
32843	);
32844	dst
32845	}
32846
32847	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
32848	/// (elements are copied from src when the corresponding mask bit is not set).
32849	/// mem_addr does not need to be aligned on any particular boundary.
32850	///
32851	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
32852	#[inline]
32853	#[target_feature(enable = "avx512f")]
32854	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32855	pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
32856	let mut dst: __m512 = src;
32857	asm!(
32858	vpl!("vmovups {dst}{{{k}}}"),
32859	p = in(reg) mem_addr,
32860	k = in(kreg) k,
32861	dst = inout(zmm_reg) dst,
32862	options(pure, readonly, nostack)
32863	);
32864	dst
32865	}
32866
32867	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
32868	/// (elements are zeroed out when the corresponding mask bit is not set).
32869	/// mem_addr does not need to be aligned on any particular boundary.
32870	///
32871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
32872	#[inline]
32873	#[target_feature(enable = "avx512f")]
32874	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32875	pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
32876	let mut dst: __m512;
32877	asm!(
32878	vpl!("vmovups {dst}{{{k}}} {{z}}"),
32879	p = in(reg) mem_addr,
32880	k = in(kreg) k,
32881	dst = out(zmm_reg) dst,
32882	options(pure, readonly, nostack)
32883	);
32884	dst
32885	}
32886
32887	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
32888	/// (elements are copied from src when the corresponding mask bit is not set).
32889	/// mem_addr does not need to be aligned on any particular boundary.
32890	///
32891	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
32892	#[inline]
32893	#[target_feature(enable = "avx512f")]
32894	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32895	pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
32896	let mut dst: __m512d = src;
32897	asm!(
32898	vpl!("vmovupd {dst}{{{k}}}"),
32899	p = in(reg) mem_addr,
32900	k = in(kreg) k,
32901	dst = inout(zmm_reg) dst,
32902	options(pure, readonly, nostack)
32903	);
32904	dst
32905	}
32906
32907	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
32908	/// (elements are zeroed out when the corresponding mask bit is not set).
32909	/// mem_addr does not need to be aligned on any particular boundary.
32910	///
32911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
32912	#[inline]
32913	#[target_feature(enable = "avx512f")]
32914	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32915	pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
32916	let mut dst: __m512d;
32917	asm!(
32918	vpl!("vmovupd {dst}{{{k}}} {{z}}"),
32919	p = in(reg) mem_addr,
32920	k = in(kreg) k,
32921	dst = out(zmm_reg) dst,
32922	options(pure, readonly, nostack)
32923	);
32924	dst
32925	}
32926
32927	/// Load packed 32-bit integers from memory into dst using writemask k
32928	/// (elements are copied from src when the corresponding mask bit is not set).
32929	/// mem_addr does not need to be aligned on any particular boundary.
32930	///
32931	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
32932	#[inline]
32933	#[target_feature(enable = "avx512f,avx512vl,avx")]
32934	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32935	pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
32936	let mut dst: __m256i = src;
32937	asm!(
32938	vpl!("vmovdqu32 {dst}{{{k}}}"),
32939	p = in(reg) mem_addr,
32940	k = in(kreg) k,
32941	dst = inout(ymm_reg) dst,
32942	options(pure, readonly, nostack)
32943	);
32944	dst
32945	}
32946
32947	/// Load packed 32-bit integers from memory into dst using zeromask k
32948	/// (elements are zeroed out when the corresponding mask bit is not set).
32949	/// mem_addr does not need to be aligned on any particular boundary.
32950	///
32951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
32952	#[inline]
32953	#[target_feature(enable = "avx512f,avx512vl,avx")]
32954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32955	pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
32956	let mut dst: __m256i;
32957	asm!(
32958	vpl!("vmovdqu32 {dst}{{{k}}} {{z}}"),
32959	p = in(reg) mem_addr,
32960	k = in(kreg) k,
32961	dst = out(ymm_reg) dst,
32962	options(pure, readonly, nostack)
32963	);
32964	dst
32965	}
32966
32967	/// Load packed 64-bit integers from memory into dst using writemask k
32968	/// (elements are copied from src when the corresponding mask bit is not set).
32969	/// mem_addr does not need to be aligned on any particular boundary.
32970	///
32971	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
32972	#[inline]
32973	#[target_feature(enable = "avx512f,avx512vl,avx")]
32974	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32975	pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
32976	let mut dst: __m256i = src;
32977	asm!(
32978	vpl!("vmovdqu64 {dst}{{{k}}}"),
32979	p = in(reg) mem_addr,
32980	k = in(kreg) k,
32981	dst = inout(ymm_reg) dst,
32982	options(pure, readonly, nostack)
32983	);
32984	dst
32985	}
32986
32987	/// Load packed 64-bit integers from memory into dst using zeromask k
32988	/// (elements are zeroed out when the corresponding mask bit is not set).
32989	/// mem_addr does not need to be aligned on any particular boundary.
32990	///
32991	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
32992	#[inline]
32993	#[target_feature(enable = "avx512f,avx512vl,avx")]
32994	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32995	pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
32996	let mut dst: __m256i;
32997	asm!(
32998	vpl!("vmovdqu64 {dst}{{{k}}} {{z}}"),
32999	p = in(reg) mem_addr,
33000	k = in(kreg) k,
33001	dst = out(ymm_reg) dst,
33002	options(pure, readonly, nostack)
33003	);
33004	dst
33005	}
33006
33007	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33008	/// (elements are copied from src when the corresponding mask bit is not set).
33009	/// mem_addr does not need to be aligned on any particular boundary.
33010	///
33011	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
33012	#[inline]
33013	#[target_feature(enable = "avx512f,avx512vl,avx")]
33014	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33015	pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
33016	let mut dst: __m256 = src;
33017	asm!(
33018	vpl!("vmovups {dst}{{{k}}}"),
33019	p = in(reg) mem_addr,
33020	k = in(kreg) k,
33021	dst = inout(ymm_reg) dst,
33022	options(pure, readonly, nostack)
33023	);
33024	dst
33025	}
33026
33027	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33028	/// (elements are zeroed out when the corresponding mask bit is not set).
33029	/// mem_addr does not need to be aligned on any particular boundary.
33030	///
33031	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
33032	#[inline]
33033	#[target_feature(enable = "avx512f,avx512vl,avx")]
33034	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33035	pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
33036	let mut dst: __m256;
33037	asm!(
33038	vpl!("vmovups {dst}{{{k}}} {{z}}"),
33039	p = in(reg) mem_addr,
33040	k = in(kreg) k,
33041	dst = out(ymm_reg) dst,
33042	options(pure, readonly, nostack)
33043	);
33044	dst
33045	}
33046
33047	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33048	/// (elements are copied from src when the corresponding mask bit is not set).
33049	/// mem_addr does not need to be aligned on any particular boundary.
33050	///
33051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
33052	#[inline]
33053	#[target_feature(enable = "avx512f,avx512vl,avx")]
33054	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33055	pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
33056	let mut dst: __m256d = src;
33057	asm!(
33058	vpl!("vmovupd {dst}{{{k}}}"),
33059	p = in(reg) mem_addr,
33060	k = in(kreg) k,
33061	dst = inout(ymm_reg) dst,
33062	options(pure, readonly, nostack)
33063	);
33064	dst
33065	}
33066
33067	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33068	/// (elements are zeroed out when the corresponding mask bit is not set).
33069	/// mem_addr does not need to be aligned on any particular boundary.
33070	///
33071	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
33072	#[inline]
33073	#[target_feature(enable = "avx512f,avx512vl,avx")]
33074	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33075	pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
33076	let mut dst: __m256d;
33077	asm!(
33078	vpl!("vmovupd {dst}{{{k}}} {{z}}"),
33079	p = in(reg) mem_addr,
33080	k = in(kreg) k,
33081	dst = out(ymm_reg) dst,
33082	options(pure, readonly, nostack)
33083	);
33084	dst
33085	}
33086
33087	/// Load packed 32-bit integers from memory into dst using writemask k
33088	/// (elements are copied from src when the corresponding mask bit is not set).
33089	/// mem_addr does not need to be aligned on any particular boundary.
33090	///
33091	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
33092	#[inline]
33093	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33094	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33095	pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
33096	let mut dst: __m128i = src;
33097	asm!(
33098	vpl!("vmovdqu32 {dst}{{{k}}}"),
33099	p = in(reg) mem_addr,
33100	k = in(kreg) k,
33101	dst = inout(xmm_reg) dst,
33102	options(pure, readonly, nostack)
33103	);
33104	dst
33105	}
33106
33107	/// Load packed 32-bit integers from memory into dst using zeromask k
33108	/// (elements are zeroed out when the corresponding mask bit is not set).
33109	/// mem_addr does not need to be aligned on any particular boundary.
33110	///
33111	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
33112	#[inline]
33113	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33114	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33115	pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
33116	let mut dst: __m128i;
33117	asm!(
33118	vpl!("vmovdqu32 {dst}{{{k}}} {{z}}"),
33119	p = in(reg) mem_addr,
33120	k = in(kreg) k,
33121	dst = out(xmm_reg) dst,
33122	options(pure, readonly, nostack)
33123	);
33124	dst
33125	}
33126
33127	/// Load packed 64-bit integers from memory into dst using writemask k
33128	/// (elements are copied from src when the corresponding mask bit is not set).
33129	/// mem_addr does not need to be aligned on any particular boundary.
33130	///
33131	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
33132	#[inline]
33133	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33134	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33135	pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
33136	let mut dst: __m128i = src;
33137	asm!(
33138	vpl!("vmovdqu64 {dst}{{{k}}}"),
33139	p = in(reg) mem_addr,
33140	k = in(kreg) k,
33141	dst = inout(xmm_reg) dst,
33142	options(pure, readonly, nostack)
33143	);
33144	dst
33145	}
33146
33147	/// Load packed 64-bit integers from memory into dst using zeromask k
33148	/// (elements are zeroed out when the corresponding mask bit is not set).
33149	/// mem_addr does not need to be aligned on any particular boundary.
33150	///
33151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
33152	#[inline]
33153	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33154	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33155	pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
33156	let mut dst: __m128i;
33157	asm!(
33158	vpl!("vmovdqu64 {dst}{{{k}}} {{z}}"),
33159	p = in(reg) mem_addr,
33160	k = in(kreg) k,
33161	dst = out(xmm_reg) dst,
33162	options(pure, readonly, nostack)
33163	);
33164	dst
33165	}
33166
33167	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33168	/// (elements are copied from src when the corresponding mask bit is not set).
33169	/// mem_addr does not need to be aligned on any particular boundary.
33170	///
33171	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
33172	#[inline]
33173	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33174	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33175	pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
33176	let mut dst: __m128 = src;
33177	asm!(
33178	vpl!("vmovups {dst}{{{k}}}"),
33179	p = in(reg) mem_addr,
33180	k = in(kreg) k,
33181	dst = inout(xmm_reg) dst,
33182	options(pure, readonly, nostack)
33183	);
33184	dst
33185	}
33186
33187	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33188	/// (elements are zeroed out when the corresponding mask bit is not set).
33189	/// mem_addr does not need to be aligned on any particular boundary.
33190	///
33191	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
33192	#[inline]
33193	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33194	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33195	pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
33196	let mut dst: __m128;
33197	asm!(
33198	vpl!("vmovups {dst}{{{k}}} {{z}}"),
33199	p = in(reg) mem_addr,
33200	k = in(kreg) k,
33201	dst = out(xmm_reg) dst,
33202	options(pure, readonly, nostack)
33203	);
33204	dst
33205	}
33206
33207	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33208	/// (elements are copied from src when the corresponding mask bit is not set).
33209	/// mem_addr does not need to be aligned on any particular boundary.
33210	///
33211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
33212	#[inline]
33213	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33214	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33215	pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
33216	let mut dst: __m128d = src;
33217	asm!(
33218	vpl!("vmovupd {dst}{{{k}}}"),
33219	p = in(reg) mem_addr,
33220	k = in(kreg) k,
33221	dst = inout(xmm_reg) dst,
33222	options(pure, readonly, nostack)
33223	);
33224	dst
33225	}
33226
33227	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33228	/// (elements are zeroed out when the corresponding mask bit is not set).
33229	/// mem_addr does not need to be aligned on any particular boundary.
33230	///
33231	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
33232	#[inline]
33233	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33234	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33235	pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
33236	let mut dst: __m128d;
33237	asm!(
33238	vpl!("vmovupd {dst}{{{k}}} {{z}}"),
33239	p = in(reg) mem_addr,
33240	k = in(kreg) k,
33241	dst = out(xmm_reg) dst,
33242	options(pure, readonly, nostack)
33243	);
33244	dst
33245	}
33246
33247	/// Load packed 32-bit integers from memory into dst using writemask k
33248	/// (elements are copied from src when the corresponding mask bit is not set).
33249	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33250	///
33251	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
33252	#[inline]
33253	#[target_feature(enable = "avx512f")]
33254	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33255	pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
33256	let mut dst: __m512i = src;
33257	asm!(
33258	vpl!("vmovdqa32 {dst}{{{k}}}"),
33259	p = in(reg) mem_addr,
33260	k = in(kreg) k,
33261	dst = inout(zmm_reg) dst,
33262	options(pure, readonly, nostack)
33263	);
33264	dst
33265	}
33266
33267	/// Load packed 32-bit integers from memory into dst using zeromask k
33268	/// (elements are zeroed out when the corresponding mask bit is not set).
33269	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33270	///
33271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
33272	#[inline]
33273	#[target_feature(enable = "avx512f")]
33274	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33275	pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
33276	let mut dst: __m512i;
33277	asm!(
33278	vpl!("vmovdqa32 {dst}{{{k}}} {{z}}"),
33279	p = in(reg) mem_addr,
33280	k = in(kreg) k,
33281	dst = out(zmm_reg) dst,
33282	options(pure, readonly, nostack)
33283	);
33284	dst
33285	}
33286
33287	/// Load packed 64-bit integers from memory into dst using writemask k
33288	/// (elements are copied from src when the corresponding mask bit is not set).
33289	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33290	///
33291	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
33292	#[inline]
33293	#[target_feature(enable = "avx512f")]
33294	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33295	pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
33296	let mut dst: __m512i = src;
33297	asm!(
33298	vpl!("vmovdqa64 {dst}{{{k}}}"),
33299	p = in(reg) mem_addr,
33300	k = in(kreg) k,
33301	dst = inout(zmm_reg) dst,
33302	options(pure, readonly, nostack)
33303	);
33304	dst
33305	}
33306
33307	/// Load packed 64-bit integers from memory into dst using zeromask k
33308	/// (elements are zeroed out when the corresponding mask bit is not set).
33309	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33310	///
33311	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
33312	#[inline]
33313	#[target_feature(enable = "avx512f")]
33314	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33315	pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
33316	let mut dst: __m512i;
33317	asm!(
33318	vpl!("vmovdqa64 {dst}{{{k}}} {{z}}"),
33319	p = in(reg) mem_addr,
33320	k = in(kreg) k,
33321	dst = out(zmm_reg) dst,
33322	options(pure, readonly, nostack)
33323	);
33324	dst
33325	}
33326
33327	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33328	/// (elements are copied from src when the corresponding mask bit is not set).
33329	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33330	///
33331	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
33332	#[inline]
33333	#[target_feature(enable = "avx512f")]
33334	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33335	pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
33336	let mut dst: __m512 = src;
33337	asm!(
33338	vpl!("vmovaps {dst}{{{k}}}"),
33339	p = in(reg) mem_addr,
33340	k = in(kreg) k,
33341	dst = inout(zmm_reg) dst,
33342	options(pure, readonly, nostack)
33343	);
33344	dst
33345	}
33346
33347	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33348	/// (elements are zeroed out when the corresponding mask bit is not set).
33349	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33350	///
33351	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
33352	#[inline]
33353	#[target_feature(enable = "avx512f")]
33354	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33355	pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
33356	let mut dst: __m512;
33357	asm!(
33358	vpl!("vmovaps {dst}{{{k}}} {{z}}"),
33359	p = in(reg) mem_addr,
33360	k = in(kreg) k,
33361	dst = out(zmm_reg) dst,
33362	options(pure, readonly, nostack)
33363	);
33364	dst
33365	}
33366
33367	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33368	/// (elements are copied from src when the corresponding mask bit is not set).
33369	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33370	///
33371	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
33372	#[inline]
33373	#[target_feature(enable = "avx512f")]
33374	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33375	pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
33376	let mut dst: __m512d = src;
33377	asm!(
33378	vpl!("vmovapd {dst}{{{k}}}"),
33379	p = in(reg) mem_addr,
33380	k = in(kreg) k,
33381	dst = inout(zmm_reg) dst,
33382	options(pure, readonly, nostack)
33383	);
33384	dst
33385	}
33386
33387	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33388	/// (elements are zeroed out when the corresponding mask bit is not set).
33389	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33390	///
33391	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
33392	#[inline]
33393	#[target_feature(enable = "avx512f")]
33394	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33395	pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
33396	let mut dst: __m512d;
33397	asm!(
33398	vpl!("vmovapd {dst}{{{k}}} {{z}}"),
33399	p = in(reg) mem_addr,
33400	k = in(kreg) k,
33401	dst = out(zmm_reg) dst,
33402	options(pure, readonly, nostack)
33403	);
33404	dst
33405	}
33406
33407	/// Load packed 32-bit integers from memory into dst using writemask k
33408	/// (elements are copied from src when the corresponding mask bit is not set).
33409	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33410	///
33411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
33412	#[inline]
33413	#[target_feature(enable = "avx512f,avx512vl,avx")]
33414	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33415	pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
33416	let mut dst: __m256i = src;
33417	asm!(
33418	vpl!("vmovdqa32 {dst}{{{k}}}"),
33419	p = in(reg) mem_addr,
33420	k = in(kreg) k,
33421	dst = inout(ymm_reg) dst,
33422	options(pure, readonly, nostack)
33423	);
33424	dst
33425	}
33426
33427	/// Load packed 32-bit integers from memory into dst using zeromask k
33428	/// (elements are zeroed out when the corresponding mask bit is not set).
33429	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33430	///
33431	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
33432	#[inline]
33433	#[target_feature(enable = "avx512f,avx512vl,avx")]
33434	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33435	pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
33436	let mut dst: __m256i;
33437	asm!(
33438	vpl!("vmovdqa32 {dst}{{{k}}} {{z}}"),
33439	p = in(reg) mem_addr,
33440	k = in(kreg) k,
33441	dst = out(ymm_reg) dst,
33442	options(pure, readonly, nostack)
33443	);
33444	dst
33445	}
33446
33447	/// Load packed 64-bit integers from memory into dst using writemask k
33448	/// (elements are copied from src when the corresponding mask bit is not set).
33449	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33450	///
33451	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
33452	#[inline]
33453	#[target_feature(enable = "avx512f,avx512vl,avx")]
33454	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33455	pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
33456	let mut dst: __m256i = src;
33457	asm!(
33458	vpl!("vmovdqa64 {dst}{{{k}}}"),
33459	p = in(reg) mem_addr,
33460	k = in(kreg) k,
33461	dst = inout(ymm_reg) dst,
33462	options(pure, readonly, nostack)
33463	);
33464	dst
33465	}
33466
33467	/// Load packed 64-bit integers from memory into dst using zeromask k
33468	/// (elements are zeroed out when the corresponding mask bit is not set).
33469	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33470	///
33471	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
33472	#[inline]
33473	#[target_feature(enable = "avx512f,avx512vl,avx")]
33474	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33475	pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
33476	let mut dst: __m256i;
33477	asm!(
33478	vpl!("vmovdqa64 {dst}{{{k}}} {{z}}"),
33479	p = in(reg) mem_addr,
33480	k = in(kreg) k,
33481	dst = out(ymm_reg) dst,
33482	options(pure, readonly, nostack)
33483	);
33484	dst
33485	}
33486
33487	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33488	/// (elements are copied from src when the corresponding mask bit is not set).
33489	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33490	///
33491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
33492	#[inline]
33493	#[target_feature(enable = "avx512f,avx512vl,avx")]
33494	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33495	pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
33496	let mut dst: __m256 = src;
33497	asm!(
33498	vpl!("vmovaps {dst}{{{k}}}"),
33499	p = in(reg) mem_addr,
33500	k = in(kreg) k,
33501	dst = inout(ymm_reg) dst,
33502	options(pure, readonly, nostack)
33503	);
33504	dst
33505	}
33506
33507	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33508	/// (elements are zeroed out when the corresponding mask bit is not set).
33509	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33510	///
33511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
33512	#[inline]
33513	#[target_feature(enable = "avx512f,avx512vl,avx")]
33514	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33515	pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
33516	let mut dst: __m256;
33517	asm!(
33518	vpl!("vmovaps {dst}{{{k}}} {{z}}"),
33519	p = in(reg) mem_addr,
33520	k = in(kreg) k,
33521	dst = out(ymm_reg) dst,
33522	options(pure, readonly, nostack)
33523	);
33524	dst
33525	}
33526
33527	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33528	/// (elements are copied from src when the corresponding mask bit is not set).
33529	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33530	///
33531	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
33532	#[inline]
33533	#[target_feature(enable = "avx512f,avx512vl,avx")]
33534	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33535	pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
33536	let mut dst: __m256d = src;
33537	asm!(
33538	vpl!("vmovapd {dst}{{{k}}}"),
33539	p = in(reg) mem_addr,
33540	k = in(kreg) k,
33541	dst = inout(ymm_reg) dst,
33542	options(pure, readonly, nostack)
33543	);
33544	dst
33545	}
33546
33547	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33548	/// (elements are zeroed out when the corresponding mask bit is not set).
33549	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33550	///
33551	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
33552	#[inline]
33553	#[target_feature(enable = "avx512f,avx512vl,avx")]
33554	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33555	pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
33556	let mut dst: __m256d;
33557	asm!(
33558	vpl!("vmovapd {dst}{{{k}}} {{z}}"),
33559	p = in(reg) mem_addr,
33560	k = in(kreg) k,
33561	dst = out(ymm_reg) dst,
33562	options(pure, readonly, nostack)
33563	);
33564	dst
33565	}
33566
33567	/// Load packed 32-bit integers from memory into dst using writemask k
33568	/// (elements are copied from src when the corresponding mask bit is not set).
33569	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33570	///
33571	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
33572	#[inline]
33573	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33574	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33575	pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
33576	let mut dst: __m128i = src;
33577	asm!(
33578	vpl!("vmovdqa32 {dst}{{{k}}}"),
33579	p = in(reg) mem_addr,
33580	k = in(kreg) k,
33581	dst = inout(xmm_reg) dst,
33582	options(pure, readonly, nostack)
33583	);
33584	dst
33585	}
33586
33587	/// Load packed 32-bit integers from memory into dst using zeromask k
33588	/// (elements are zeroed out when the corresponding mask bit is not set).
33589	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33590	///
33591	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
33592	#[inline]
33593	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33594	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33595	pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
33596	let mut dst: __m128i;
33597	asm!(
33598	vpl!("vmovdqa32 {dst}{{{k}}} {{z}}"),
33599	p = in(reg) mem_addr,
33600	k = in(kreg) k,
33601	dst = out(xmm_reg) dst,
33602	options(pure, readonly, nostack)
33603	);
33604	dst
33605	}
33606
33607	/// Load packed 64-bit integers from memory into dst using writemask k
33608	/// (elements are copied from src when the corresponding mask bit is not set).
33609	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33610	///
33611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
33612	#[inline]
33613	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33614	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33615	pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
33616	let mut dst: __m128i = src;
33617	asm!(
33618	vpl!("vmovdqa64 {dst}{{{k}}}"),
33619	p = in(reg) mem_addr,
33620	k = in(kreg) k,
33621	dst = inout(xmm_reg) dst,
33622	options(pure, readonly, nostack)
33623	);
33624	dst
33625	}
33626
33627	/// Load packed 64-bit integers from memory into dst using zeromask k
33628	/// (elements are zeroed out when the corresponding mask bit is not set).
33629	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33630	///
33631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
33632	#[inline]
33633	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33634	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33635	pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
33636	let mut dst: __m128i;
33637	asm!(
33638	vpl!("vmovdqa64 {dst}{{{k}}} {{z}}"),
33639	p = in(reg) mem_addr,
33640	k = in(kreg) k,
33641	dst = out(xmm_reg) dst,
33642	options(pure, readonly, nostack)
33643	);
33644	dst
33645	}
33646
33647	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33648	/// (elements are copied from src when the corresponding mask bit is not set).
33649	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33650	///
33651	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
33652	#[inline]
33653	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33654	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33655	pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
33656	let mut dst: __m128 = src;
33657	asm!(
33658	vpl!("vmovaps {dst}{{{k}}}"),
33659	p = in(reg) mem_addr,
33660	k = in(kreg) k,
33661	dst = inout(xmm_reg) dst,
33662	options(pure, readonly, nostack)
33663	);
33664	dst
33665	}
33666
33667	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33668	/// (elements are zeroed out when the corresponding mask bit is not set).
33669	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33670	///
33671	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
33672	#[inline]
33673	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33674	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33675	pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
33676	let mut dst: __m128;
33677	asm!(
33678	vpl!("vmovaps {dst}{{{k}}} {{z}}"),
33679	p = in(reg) mem_addr,
33680	k = in(kreg) k,
33681	dst = out(xmm_reg) dst,
33682	options(pure, readonly, nostack)
33683	);
33684	dst
33685	}
33686
33687	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33688	/// (elements are copied from src when the corresponding mask bit is not set).
33689	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33690	///
33691	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
33692	#[inline]
33693	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33694	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33695	pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
33696	let mut dst: __m128d = src;
33697	asm!(
33698	vpl!("vmovapd {dst}{{{k}}}"),
33699	p = in(reg) mem_addr,
33700	k = in(kreg) k,
33701	dst = inout(xmm_reg) dst,
33702	options(pure, readonly, nostack)
33703	);
33704	dst
33705	}
33706
33707	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33708	/// (elements are zeroed out when the corresponding mask bit is not set).
33709	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33710	///
33711	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
33712	#[inline]
33713	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33714	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33715	pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
33716	let mut dst: __m128d;
33717	asm!(
33718	vpl!("vmovapd {dst}{{{k}}} {{z}}"),
33719	p = in(reg) mem_addr,
33720	k = in(kreg) k,
33721	dst = out(xmm_reg) dst,
33722	options(pure, readonly, nostack)
33723	);
33724	dst
33725	}
33726
33727	/// Store packed 32-bit integers from a into memory using writemask k.
33728	/// mem_addr does not need to be aligned on any particular boundary.
33729	///
33730	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
33731	#[inline]
33732	#[target_feature(enable = "avx512f")]
33733	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33734	pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
33735	asm!(
33736	vps!("vmovdqu32", "{{{mask}}}, {a}"),
33737	p = in(reg) mem_addr,
33738	mask = in(kreg) mask,
33739	a = in(zmm_reg) a,
33740	options(nostack)
33741	);
33742	}
33743
33744	/// Store packed 64-bit integers from a into memory using writemask k.
33745	/// mem_addr does not need to be aligned on any particular boundary.
33746	///
33747	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
33748	#[inline]
33749	#[target_feature(enable = "avx512f")]
33750	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33751	pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
33752	asm!(
33753	vps!("vmovdqu64", "{{{mask}}}, {a}"),
33754	p = in(reg) mem_addr,
33755	mask = in(kreg) mask,
33756	a = in(zmm_reg) a,
33757	options(nostack)
33758	);
33759	}
33760
33761	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33762	/// mem_addr does not need to be aligned on any particular boundary.
33763	///
33764	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
33765	#[inline]
33766	#[target_feature(enable = "avx512f")]
33767	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33768	pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
33769	asm!(
33770	vps!("vmovups", "{{{mask}}}, {a}"),
33771	p = in(reg) mem_addr,
33772	mask = in(kreg) mask,
33773	a = in(zmm_reg) a,
33774	options(nostack)
33775	);
33776	}
33777
33778	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33779	/// mem_addr does not need to be aligned on any particular boundary.
33780	///
33781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
33782	#[inline]
33783	#[target_feature(enable = "avx512f")]
33784	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33785	pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
33786	asm!(
33787	vps!("vmovupd", "{{{mask}}}, {a}"),
33788	p = in(reg) mem_addr,
33789	mask = in(kreg) mask,
33790	a = in(zmm_reg) a,
33791	options(nostack)
33792	);
33793	}
33794
33795	/// Store packed 32-bit integers from a into memory using writemask k.
33796	/// mem_addr does not need to be aligned on any particular boundary.
33797	///
33798	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
33799	#[inline]
33800	#[target_feature(enable = "avx512f,avx512vl,avx")]
33801	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33802	pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
33803	asm!(
33804	vps!("vmovdqu32", "{{{mask}}}, {a}"),
33805	p = in(reg) mem_addr,
33806	mask = in(kreg) mask,
33807	a = in(ymm_reg) a,
33808	options(nostack)
33809	);
33810	}
33811
33812	/// Store packed 64-bit integers from a into memory using writemask k.
33813	/// mem_addr does not need to be aligned on any particular boundary.
33814	///
33815	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
33816	#[inline]
33817	#[target_feature(enable = "avx512f,avx512vl,avx")]
33818	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33819	pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
33820	asm!(
33821	vps!("vmovdqu64", "{{{mask}}}, {a}"),
33822	p = in(reg) mem_addr,
33823	mask = in(kreg) mask,
33824	a = in(ymm_reg) a,
33825	options(nostack)
33826	);
33827	}
33828
33829	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33830	/// mem_addr does not need to be aligned on any particular boundary.
33831	///
33832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
33833	#[inline]
33834	#[target_feature(enable = "avx512f,avx512vl,avx")]
33835	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33836	pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
33837	asm!(
33838	vps!("vmovups", "{{{mask}}}, {a}"),
33839	p = in(reg) mem_addr,
33840	mask = in(kreg) mask,
33841	a = in(ymm_reg) a,
33842	options(nostack)
33843	);
33844	}
33845
33846	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33847	/// mem_addr does not need to be aligned on any particular boundary.
33848	///
33849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
33850	#[inline]
33851	#[target_feature(enable = "avx512f,avx512vl,avx")]
33852	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33853	pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
33854	asm!(
33855	vps!("vmovupd", "{{{mask}}}, {a}"),
33856	p = in(reg) mem_addr,
33857	mask = in(kreg) mask,
33858	a = in(ymm_reg) a,
33859	options(nostack)
33860	);
33861	}
33862
33863	/// Store packed 32-bit integers from a into memory using writemask k.
33864	/// mem_addr does not need to be aligned on any particular boundary.
33865	///
33866	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
33867	#[inline]
33868	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33869	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33870	pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
33871	asm!(
33872	vps!("vmovdqu32", "{{{mask}}}, {a}"),
33873	p = in(reg) mem_addr,
33874	mask = in(kreg) mask,
33875	a = in(xmm_reg) a,
33876	options(nostack)
33877	);
33878	}
33879
33880	/// Store packed 64-bit integers from a into memory using writemask k.
33881	/// mem_addr does not need to be aligned on any particular boundary.
33882	///
33883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
33884	#[inline]
33885	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33886	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33887	pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
33888	asm!(
33889	vps!("vmovdqu64", "{{{mask}}}, {a}"),
33890	p = in(reg) mem_addr,
33891	mask = in(kreg) mask,
33892	a = in(xmm_reg) a,
33893	options(nostack)
33894	);
33895	}
33896
33897	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33898	/// mem_addr does not need to be aligned on any particular boundary.
33899	///
33900	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
33901	#[inline]
33902	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33903	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33904	pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
33905	asm!(
33906	vps!("vmovups", "{{{mask}}}, {a}"),
33907	p = in(reg) mem_addr,
33908	mask = in(kreg) mask,
33909	a = in(xmm_reg) a,
33910	options(nostack)
33911	);
33912	}
33913
33914	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33915	/// mem_addr does not need to be aligned on any particular boundary.
33916	///
33917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
33918	#[inline]
33919	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33920	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33921	pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
33922	asm!(
33923	vps!("vmovupd", "{{{mask}}}, {a}"),
33924	p = in(reg) mem_addr,
33925	mask = in(kreg) mask,
33926	a = in(xmm_reg) a,
33927	options(nostack)
33928	);
33929	}
33930
33931	/// Store packed 32-bit integers from a into memory using writemask k.
33932	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33933	///
33934	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
33935	#[inline]
33936	#[target_feature(enable = "avx512f")]
33937	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33938	pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
33939	asm!(
33940	vps!("vmovdqa32", "{{{mask}}}, {a}"),
33941	p = in(reg) mem_addr,
33942	mask = in(kreg) mask,
33943	a = in(zmm_reg) a,
33944	options(nostack)
33945	);
33946	}
33947
33948	/// Store packed 64-bit integers from a into memory using writemask k.
33949	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33950	///
33951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
33952	#[inline]
33953	#[target_feature(enable = "avx512f")]
33954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33955	pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
33956	asm!(
33957	vps!("vmovdqa64", "{{{mask}}}, {a}"),
33958	p = in(reg) mem_addr,
33959	mask = in(kreg) mask,
33960	a = in(zmm_reg) a,
33961	options(nostack)
33962	);
33963	}
33964
33965	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33966	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33967	///
33968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
33969	#[inline]
33970	#[target_feature(enable = "avx512f")]
33971	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33972	pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
33973	asm!(
33974	vps!("vmovaps", "{{{mask}}}, {a}"),
33975	p = in(reg) mem_addr,
33976	mask = in(kreg) mask,
33977	a = in(zmm_reg) a,
33978	options(nostack)
33979	);
33980	}
33981
33982	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33983	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33984	///
33985	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
33986	#[inline]
33987	#[target_feature(enable = "avx512f")]
33988	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33989	pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
33990	asm!(
33991	vps!("vmovapd", "{{{mask}}}, {a}"),
33992	p = in(reg) mem_addr,
33993	mask = in(kreg) mask,
33994	a = in(zmm_reg) a,
33995	options(nostack)
33996	);
33997	}
33998
33999	/// Store packed 32-bit integers from a into memory using writemask k.
34000	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34001	///
34002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
34003	#[inline]
34004	#[target_feature(enable = "avx512f,avx512vl,avx")]
34005	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34006	pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
34007	asm!(
34008	vps!("vmovdqa32", "{{{mask}}}, {a}"),
34009	p = in(reg) mem_addr,
34010	mask = in(kreg) mask,
34011	a = in(ymm_reg) a,
34012	options(nostack)
34013	);
34014	}
34015
34016	/// Store packed 64-bit integers from a into memory using writemask k.
34017	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34018	///
34019	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
34020	#[inline]
34021	#[target_feature(enable = "avx512f,avx512vl,avx")]
34022	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34023	pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
34024	asm!(
34025	vps!("vmovdqa64", "{{{mask}}}, {a}"),
34026	p = in(reg) mem_addr,
34027	mask = in(kreg) mask,
34028	a = in(ymm_reg) a,
34029	options(nostack)
34030	);
34031	}
34032
34033	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
34034	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34035	///
34036	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
34037	#[inline]
34038	#[target_feature(enable = "avx512f,avx512vl,avx")]
34039	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34040	pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
34041	asm!(
34042	vps!("vmovaps", "{{{mask}}}, {a}"),
34043	p = in(reg) mem_addr,
34044	mask = in(kreg) mask,
34045	a = in(ymm_reg) a,
34046	options(nostack)
34047	);
34048	}
34049
34050	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
34051	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34052	///
34053	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
34054	#[inline]
34055	#[target_feature(enable = "avx512f,avx512vl,avx")]
34056	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34057	pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
34058	asm!(
34059	vps!("vmovapd", "{{{mask}}}, {a}"),
34060	p = in(reg) mem_addr,
34061	mask = in(kreg) mask,
34062	a = in(ymm_reg) a,
34063	options(nostack)
34064	);
34065	}
34066
34067	/// Store packed 32-bit integers from a into memory using writemask k.
34068	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34069	///
34070	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
34071	#[inline]
34072	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34073	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34074	pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
34075	asm!(
34076	vps!("vmovdqa32", "{{{mask}}}, {a}"),
34077	p = in(reg) mem_addr,
34078	mask = in(kreg) mask,
34079	a = in(xmm_reg) a,
34080	options(nostack)
34081	);
34082	}
34083
34084	/// Store packed 64-bit integers from a into memory using writemask k.
34085	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34086	///
34087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
34088	#[inline]
34089	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34090	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34091	pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
34092	asm!(
34093	vps!("vmovdqa64", "{{{mask}}}, {a}"),
34094	p = in(reg) mem_addr,
34095	mask = in(kreg) mask,
34096	a = in(xmm_reg) a,
34097	options(nostack)
34098	);
34099	}
34100
34101	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
34102	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34103	///
34104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
34105	#[inline]
34106	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34107	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34108	pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
34109	asm!(
34110	vps!("vmovaps", "{{{mask}}}, {a}"),
34111	p = in(reg) mem_addr,
34112	mask = in(kreg) mask,
34113	a = in(xmm_reg) a,
34114	options(nostack)
34115	);
34116	}
34117
34118	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
34119	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34120	///
34121	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
34122	#[inline]
34123	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34124	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34125	pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
34126	asm!(
34127	vps!("vmovapd", "{{{mask}}}, {a}"),
34128	p = in(reg) mem_addr,
34129	mask = in(kreg) mask,
34130	a = in(xmm_reg) a,
34131	options(nostack)
34132	);
34133	}
34134
34135	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34136	///
34137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
34138	#[inline]
34139	#[target_feature(enable = "avx512f")]
34140	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34141	pub unsafe fn _mm512_mask_expandloadu_epi32(
34142	src: __m512i,
34143	k: __mmask16,
34144	mem_addr: *const i32,
34145	) -> __m512i {
34146	let mut dst: __m512i = src;
34147	asm!(
34148	vpl!("vpexpandd {dst}{{{k}}}"),
34149	p = in(reg) mem_addr,
34150	k = in(kreg) k,
34151	dst = inout(zmm_reg) dst,
34152	options(pure, readonly, nostack)
34153	);
34154	dst
34155	}
34156
34157	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34158	///
34159	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
34160	#[inline]
34161	#[target_feature(enable = "avx512f")]
34162	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34163	pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
34164	let mut dst: __m512i;
34165	asm!(
34166	vpl!("vpexpandd {dst}{{{k}}} {{z}}"),
34167	p = in(reg) mem_addr,
34168	k = in(kreg) k,
34169	dst = out(zmm_reg) dst,
34170	options(pure, readonly, nostack)
34171	);
34172	dst
34173	}
34174
34175	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34176	///
34177	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
34178	#[inline]
34179	#[target_feature(enable = "avx512f,avx512vl,avx")]
34180	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34181	pub unsafe fn _mm256_mask_expandloadu_epi32(
34182	src: __m256i,
34183	k: __mmask8,
34184	mem_addr: *const i32,
34185	) -> __m256i {
34186	let mut dst: __m256i = src;
34187	asm!(
34188	vpl!("vpexpandd {dst}{{{k}}}"),
34189	p = in(reg) mem_addr,
34190	k = in(kreg) k,
34191	dst = inout(ymm_reg) dst,
34192	options(pure, readonly, nostack)
34193	);
34194	dst
34195	}
34196
34197	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34198	///
34199	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
34200	#[inline]
34201	#[target_feature(enable = "avx512f,avx512vl,avx")]
34202	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34203	pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
34204	let mut dst: __m256i;
34205	asm!(
34206	vpl!("vpexpandd {dst}{{{k}}} {{z}}"),
34207	p = in(reg) mem_addr,
34208	k = in(kreg) k,
34209	dst = out(ymm_reg) dst,
34210	options(pure, readonly, nostack)
34211	);
34212	dst
34213	}
34214
34215	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34216	///
34217	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
34218	#[inline]
34219	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34220	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34221	pub unsafe fn _mm_mask_expandloadu_epi32(
34222	src: __m128i,
34223	k: __mmask8,
34224	mem_addr: *const i32,
34225	) -> __m128i {
34226	let mut dst: __m128i = src;
34227	asm!(
34228	vpl!("vpexpandd {dst}{{{k}}}"),
34229	p = in(reg) mem_addr,
34230	k = in(kreg) k,
34231	dst = inout(xmm_reg) dst,
34232	options(pure, readonly, nostack)
34233	);
34234	dst
34235	}
34236
34237	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34238	///
34239	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
34240	#[inline]
34241	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34242	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34243	pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
34244	let mut dst: __m128i;
34245	asm!(
34246	vpl!("vpexpandd {dst}{{{k}}} {{z}}"),
34247	p = in(reg) mem_addr,
34248	k = in(kreg) k,
34249	dst = out(xmm_reg) dst,
34250	options(pure, readonly, nostack)
34251	);
34252	dst
34253	}
34254
34255	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34256	///
34257	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
34258	#[inline]
34259	#[target_feature(enable = "avx512f")]
34260	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34261	pub unsafe fn _mm512_mask_expandloadu_epi64(
34262	src: __m512i,
34263	k: __mmask8,
34264	mem_addr: *const i64,
34265	) -> __m512i {
34266	let mut dst: __m512i = src;
34267	asm!(
34268	vpl!("vpexpandq {dst}{{{k}}}"),
34269	p = in(reg) mem_addr,
34270	k = in(kreg) k,
34271	dst = inout(zmm_reg) dst,
34272	options(pure, readonly, nostack)
34273	);
34274	dst
34275	}
34276
34277	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34278	///
34279	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
34280	#[inline]
34281	#[target_feature(enable = "avx512f")]
34282	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34283	pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
34284	let mut dst: __m512i;
34285	asm!(
34286	vpl!("vpexpandq {dst}{{{k}}} {{z}}"),
34287	p = in(reg) mem_addr,
34288	k = in(kreg) k,
34289	dst = out(zmm_reg) dst,
34290	options(pure, readonly, nostack)
34291	);
34292	dst
34293	}
34294
34295	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34296	///
34297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
34298	#[inline]
34299	#[target_feature(enable = "avx512f,avx512vl,avx")]
34300	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34301	pub unsafe fn _mm256_mask_expandloadu_epi64(
34302	src: __m256i,
34303	k: __mmask8,
34304	mem_addr: *const i64,
34305	) -> __m256i {
34306	let mut dst: __m256i = src;
34307	asm!(
34308	vpl!("vpexpandq {dst}{{{k}}}"),
34309	p = in(reg) mem_addr,
34310	k = in(kreg) k,
34311	dst = inout(ymm_reg) dst,
34312	options(pure, readonly, nostack)
34313	);
34314	dst
34315	}
34316
34317	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34318	///
34319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
34320	#[inline]
34321	#[target_feature(enable = "avx512f,avx512vl,avx")]
34322	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34323	pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
34324	let mut dst: __m256i;
34325	asm!(
34326	vpl!("vpexpandq {dst}{{{k}}} {{z}}"),
34327	p = in(reg) mem_addr,
34328	k = in(kreg) k,
34329	dst = out(ymm_reg) dst,
34330	options(pure, readonly, nostack)
34331	);
34332	dst
34333	}
34334
34335	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34336	///
34337	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
34338	#[inline]
34339	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34340	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34341	pub unsafe fn _mm_mask_expandloadu_epi64(
34342	src: __m128i,
34343	k: __mmask8,
34344	mem_addr: *const i64,
34345	) -> __m128i {
34346	let mut dst: __m128i = src;
34347	asm!(
34348	vpl!("vpexpandq {dst}{{{k}}}"),
34349	p = in(reg) mem_addr,
34350	k = in(kreg) k,
34351	dst = inout(xmm_reg) dst,
34352	options(pure, readonly, nostack)
34353	);
34354	dst
34355	}
34356
34357	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34358	///
34359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
34360	#[inline]
34361	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34362	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34363	pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
34364	let mut dst: __m128i;
34365	asm!(
34366	vpl!("vpexpandq {dst}{{{k}}} {{z}}"),
34367	p = in(reg) mem_addr,
34368	k = in(kreg) k,
34369	dst = out(xmm_reg) dst,
34370	options(pure, readonly, nostack)
34371	);
34372	dst
34373	}
34374
34375	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34376	///
34377	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
34378	#[inline]
34379	#[target_feature(enable = "avx512f")]
34380	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34381	pub unsafe fn _mm512_mask_expandloadu_ps(
34382	src: __m512,
34383	k: __mmask16,
34384	mem_addr: *const f32,
34385	) -> __m512 {
34386	let mut dst: __m512 = src;
34387	asm!(
34388	vpl!("vexpandps {dst}{{{k}}}"),
34389	p = in(reg) mem_addr,
34390	k = in(kreg) k,
34391	dst = inout(zmm_reg) dst,
34392	options(pure, readonly, nostack)
34393	);
34394	dst
34395	}
34396
34397	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34398	///
34399	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
34400	#[inline]
34401	#[target_feature(enable = "avx512f")]
34402	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34403	pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
34404	let mut dst: __m512;
34405	asm!(
34406	vpl!("vexpandps {dst}{{{k}}} {{z}}"),
34407	p = in(reg) mem_addr,
34408	k = in(kreg) k,
34409	dst = out(zmm_reg) dst,
34410	options(pure, readonly, nostack)
34411	);
34412	dst
34413	}
34414
34415	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34416	///
34417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
34418	#[inline]
34419	#[target_feature(enable = "avx512f,avx512vl,avx")]
34420	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34421	pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
34422	let mut dst: __m256 = src;
34423	asm!(
34424	vpl!("vexpandps {dst}{{{k}}}"),
34425	p = in(reg) mem_addr,
34426	k = in(kreg) k,
34427	dst = inout(ymm_reg) dst,
34428	options(pure, readonly, nostack)
34429	);
34430	dst
34431	}
34432
34433	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34434	///
34435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
34436	#[inline]
34437	#[target_feature(enable = "avx512f,avx512vl,avx")]
34438	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34439	pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
34440	let mut dst: __m256;
34441	asm!(
34442	vpl!("vexpandps {dst}{{{k}}} {{z}}"),
34443	p = in(reg) mem_addr,
34444	k = in(kreg) k,
34445	dst = out(ymm_reg) dst,
34446	options(pure, readonly, nostack)
34447	);
34448	dst
34449	}
34450
34451	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34452	///
34453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
34454	#[inline]
34455	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34456	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34457	pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
34458	let mut dst: __m128 = src;
34459	asm!(
34460	vpl!("vexpandps {dst}{{{k}}}"),
34461	p = in(reg) mem_addr,
34462	k = in(kreg) k,
34463	dst = inout(xmm_reg) dst,
34464	options(pure, readonly, nostack)
34465	);
34466	dst
34467	}
34468
34469	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34470	///
34471	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
34472	#[inline]
34473	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34474	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34475	pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
34476	let mut dst: __m128;
34477	asm!(
34478	vpl!("vexpandps {dst}{{{k}}} {{z}}"),
34479	p = in(reg) mem_addr,
34480	k = in(kreg) k,
34481	dst = out(xmm_reg) dst,
34482	options(pure, readonly, nostack)
34483	);
34484	dst
34485	}
34486
34487	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34488	///
34489	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
34490	#[inline]
34491	#[target_feature(enable = "avx512f")]
34492	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34493	pub unsafe fn _mm512_mask_expandloadu_pd(
34494	src: __m512d,
34495	k: __mmask8,
34496	mem_addr: *const f64,
34497	) -> __m512d {
34498	let mut dst: __m512d = src;
34499	asm!(
34500	vpl!("vexpandpd {dst}{{{k}}}"),
34501	p = in(reg) mem_addr,
34502	k = in(kreg) k,
34503	dst = inout(zmm_reg) dst,
34504	options(pure, readonly, nostack)
34505	);
34506	dst
34507	}
34508
34509	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34510	///
34511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
34512	#[inline]
34513	#[target_feature(enable = "avx512f")]
34514	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34515	pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
34516	let mut dst: __m512d;
34517	asm!(
34518	vpl!("vexpandpd {dst}{{{k}}} {{z}}"),
34519	p = in(reg) mem_addr,
34520	k = in(kreg) k,
34521	dst = out(zmm_reg) dst,
34522	options(pure, readonly, nostack)
34523	);
34524	dst
34525	}
34526
34527	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34528	///
34529	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
34530	#[inline]
34531	#[target_feature(enable = "avx512f,avx512vl,avx")]
34532	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34533	pub unsafe fn _mm256_mask_expandloadu_pd(
34534	src: __m256d,
34535	k: __mmask8,
34536	mem_addr: *const f64,
34537	) -> __m256d {
34538	let mut dst: __m256d = src;
34539	asm!(
34540	vpl!("vexpandpd {dst}{{{k}}}"),
34541	p = in(reg) mem_addr,
34542	k = in(kreg) k,
34543	dst = inout(ymm_reg) dst,
34544	options(pure, readonly, nostack)
34545	);
34546	dst
34547	}
34548
34549	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34550	///
34551	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
34552	#[inline]
34553	#[target_feature(enable = "avx512f,avx512vl,avx")]
34554	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34555	pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
34556	let mut dst: __m256d;
34557	asm!(
34558	vpl!("vexpandpd {dst}{{{k}}} {{z}}"),
34559	p = in(reg) mem_addr,
34560	k = in(kreg) k,
34561	dst = out(ymm_reg) dst,
34562	options(pure, readonly, nostack)
34563	);
34564	dst
34565	}
34566
34567	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34568	///
34569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
34570	#[inline]
34571	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34572	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34573	pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
34574	let mut dst: __m128d = src;
34575	asm!(
34576	vpl!("vexpandpd {dst}{{{k}}}"),
34577	p = in(reg) mem_addr,
34578	k = in(kreg) k,
34579	dst = inout(xmm_reg) dst,
34580	options(pure, readonly, nostack)
34581	);
34582	dst
34583	}
34584
34585	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34586	///
34587	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
34588	#[inline]
34589	#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34590	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34591	pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
34592	let mut dst: __m128d;
34593	asm!(
34594	vpl!("vexpandpd {dst}{{{k}}} {{z}}"),
34595	p = in(reg) mem_addr,
34596	k = in(kreg) k,
34597	dst = out(xmm_reg) dst,
34598	options(pure, readonly, nostack)
34599	);
34600	dst
34601	}
34602
34603	/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
34604	///
34605	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
34606	#[inline]
34607	#[target_feature(enable = "avx512f")]
34608	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34609	pub unsafe fn _mm512_setr_pd(
34610	e0: f64,
34611	e1: f64,
34612	e2: f64,
34613	e3: f64,
34614	e4: f64,
34615	e5: f64,
34616	e6: f64,
34617	e7: f64,
34618	) -> __m512d {
34619	let r: f64x8 = f64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
34620	transmute(src:r)
34621	}
34622
34623	/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
34624	///
34625	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
34626	#[inline]
34627	#[target_feature(enable = "avx512f")]
34628	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34629	pub unsafe fn _mm512_set_pd(
34630	e0: f64,
34631	e1: f64,
34632	e2: f64,
34633	e3: f64,
34634	e4: f64,
34635	e5: f64,
34636	e6: f64,
34637	e7: f64,
34638	) -> __m512d {
34639	_mm512_setr_pd(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
34640	}
34641
34642	/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34643	///
34644	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
34645	#[inline]
34646	#[target_feature(enable = "avx512f")]
34647	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34648	#[cfg_attr(test, assert_instr(vmovss))]
34649	pub unsafe fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34650	let extractsrc: f32 = simd_extract!(src, `0`);
34651	let mut mov: f32 = extractsrc;
34652	if (k & `0b00000001`) != `0` {
34653	mov = simd_extract!(b, `0`);
34654	}
34655	simd_insert!(a, `0`, mov)
34656	}
34657
34658	/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34659	///
34660	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
34661	#[inline]
34662	#[target_feature(enable = "avx512f")]
34663	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34664	#[cfg_attr(test, assert_instr(vmovss))]
34665	pub unsafe fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34666	let mut mov: f32 = `0.`;
34667	if (k & `0b00000001`) != `0` {
34668	mov = simd_extract!(b, `0`);
34669	}
34670	simd_insert!(a, `0`, mov)
34671	}
34672
34673	/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34674	///
34675	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
34676	#[inline]
34677	#[target_feature(enable = "avx512f")]
34678	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34679	#[cfg_attr(test, assert_instr(vmovsd))]
34680	pub unsafe fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34681	let extractsrc: f64 = simd_extract!(src, `0`);
34682	let mut mov: f64 = extractsrc;
34683	if (k & `0b00000001`) != `0` {
34684	mov = simd_extract!(b, `0`);
34685	}
34686	simd_insert!(a, `0`, mov)
34687	}
34688
34689	/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34690	///
34691	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
34692	#[inline]
34693	#[target_feature(enable = "avx512f")]
34694	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34695	#[cfg_attr(test, assert_instr(vmovsd))]
34696	pub unsafe fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34697	let mut mov: f64 = `0.`;
34698	if (k & `0b00000001`) != `0` {
34699	mov = simd_extract!(b, `0`);
34700	}
34701	simd_insert!(a, `0`, mov)
34702	}
34703
34704	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34705	///
34706	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
34707	#[inline]
34708	#[target_feature(enable = "avx512f")]
34709	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34710	#[cfg_attr(test, assert_instr(vaddss))]
34711	pub unsafe fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34712	let extractsrc: f32 = simd_extract!(src, `0`);
34713	let mut add: f32 = extractsrc;
34714	if (k & `0b00000001`) != `0` {
34715	let extracta: f32 = simd_extract!(a, `0`);
34716	let extractb: f32 = simd_extract!(b, `0`);
34717	add = extracta + extractb;
34718	}
34719	simd_insert!(a, `0`, add)
34720	}
34721
34722	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34723	///
34724	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
34725	#[inline]
34726	#[target_feature(enable = "avx512f")]
34727	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34728	#[cfg_attr(test, assert_instr(vaddss))]
34729	pub unsafe fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34730	let mut add: f32 = `0.`;
34731	if (k & `0b00000001`) != `0` {
34732	let extracta: f32 = simd_extract!(a, `0`);
34733	let extractb: f32 = simd_extract!(b, `0`);
34734	add = extracta + extractb;
34735	}
34736	simd_insert!(a, `0`, add)
34737	}
34738
34739	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34740	///
34741	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
34742	#[inline]
34743	#[target_feature(enable = "avx512f")]
34744	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34745	#[cfg_attr(test, assert_instr(vaddsd))]
34746	pub unsafe fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34747	let extractsrc: f64 = simd_extract!(src, `0`);
34748	let mut add: f64 = extractsrc;
34749	if (k & `0b00000001`) != `0` {
34750	let extracta: f64 = simd_extract!(a, `0`);
34751	let extractb: f64 = simd_extract!(b, `0`);
34752	add = extracta + extractb;
34753	}
34754	simd_insert!(a, `0`, add)
34755	}
34756
34757	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34758	///
34759	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
34760	#[inline]
34761	#[target_feature(enable = "avx512f")]
34762	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34763	#[cfg_attr(test, assert_instr(vaddsd))]
34764	pub unsafe fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34765	let mut add: f64 = `0.`;
34766	if (k & `0b00000001`) != `0` {
34767	let extracta: f64 = simd_extract!(a, `0`);
34768	let extractb: f64 = simd_extract!(b, `0`);
34769	add = extracta + extractb;
34770	}
34771	simd_insert!(a, `0`, add)
34772	}
34773
34774	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34775	///
34776	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
34777	#[inline]
34778	#[target_feature(enable = "avx512f")]
34779	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34780	#[cfg_attr(test, assert_instr(vsubss))]
34781	pub unsafe fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34782	let extractsrc: f32 = simd_extract!(src, `0`);
34783	let mut add: f32 = extractsrc;
34784	if (k & `0b00000001`) != `0` {
34785	let extracta: f32 = simd_extract!(a, `0`);
34786	let extractb: f32 = simd_extract!(b, `0`);
34787	add = extracta - extractb;
34788	}
34789	simd_insert!(a, `0`, add)
34790	}
34791
34792	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34793	///
34794	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
34795	#[inline]
34796	#[target_feature(enable = "avx512f")]
34797	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34798	#[cfg_attr(test, assert_instr(vsubss))]
34799	pub unsafe fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34800	let mut add: f32 = `0.`;
34801	if (k & `0b00000001`) != `0` {
34802	let extracta: f32 = simd_extract!(a, `0`);
34803	let extractb: f32 = simd_extract!(b, `0`);
34804	add = extracta - extractb;
34805	}
34806	simd_insert!(a, `0`, add)
34807	}
34808
34809	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34810	///
34811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
34812	#[inline]
34813	#[target_feature(enable = "avx512f")]
34814	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34815	#[cfg_attr(test, assert_instr(vsubsd))]
34816	pub unsafe fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34817	let extractsrc: f64 = simd_extract!(src, `0`);
34818	let mut add: f64 = extractsrc;
34819	if (k & `0b00000001`) != `0` {
34820	let extracta: f64 = simd_extract!(a, `0`);
34821	let extractb: f64 = simd_extract!(b, `0`);
34822	add = extracta - extractb;
34823	}
34824	simd_insert!(a, `0`, add)
34825	}
34826
34827	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34828	///
34829	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
34830	#[inline]
34831	#[target_feature(enable = "avx512f")]
34832	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34833	#[cfg_attr(test, assert_instr(vsubsd))]
34834	pub unsafe fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34835	let mut add: f64 = `0.`;
34836	if (k & `0b00000001`) != `0` {
34837	let extracta: f64 = simd_extract!(a, `0`);
34838	let extractb: f64 = simd_extract!(b, `0`);
34839	add = extracta - extractb;
34840	}
34841	simd_insert!(a, `0`, add)
34842	}
34843
34844	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34845	///
34846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
34847	#[inline]
34848	#[target_feature(enable = "avx512f")]
34849	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34850	#[cfg_attr(test, assert_instr(vmulss))]
34851	pub unsafe fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34852	let extractsrc: f32 = simd_extract!(src, `0`);
34853	let mut add: f32 = extractsrc;
34854	if (k & `0b00000001`) != `0` {
34855	let extracta: f32 = simd_extract!(a, `0`);
34856	let extractb: f32 = simd_extract!(b, `0`);
34857	add = extracta * extractb;
34858	}
34859	simd_insert!(a, `0`, add)
34860	}
34861
34862	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34863	///
34864	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
34865	#[inline]
34866	#[target_feature(enable = "avx512f")]
34867	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34868	#[cfg_attr(test, assert_instr(vmulss))]
34869	pub unsafe fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34870	let mut add: f32 = `0.`;
34871	if (k & `0b00000001`) != `0` {
34872	let extracta: f32 = simd_extract!(a, `0`);
34873	let extractb: f32 = simd_extract!(b, `0`);
34874	add = extracta * extractb;
34875	}
34876	simd_insert!(a, `0`, add)
34877	}
34878
34879	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34880	///
34881	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
34882	#[inline]
34883	#[target_feature(enable = "avx512f")]
34884	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34885	#[cfg_attr(test, assert_instr(vmulsd))]
34886	pub unsafe fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34887	let extractsrc: f64 = simd_extract!(src, `0`);
34888	let mut add: f64 = extractsrc;
34889	if (k & `0b00000001`) != `0` {
34890	let extracta: f64 = simd_extract!(a, `0`);
34891	let extractb: f64 = simd_extract!(b, `0`);
34892	add = extracta * extractb;
34893	}
34894	simd_insert!(a, `0`, add)
34895	}
34896
34897	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34898	///
34899	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
34900	#[inline]
34901	#[target_feature(enable = "avx512f")]
34902	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34903	#[cfg_attr(test, assert_instr(vmulsd))]
34904	pub unsafe fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34905	let mut add: f64 = `0.`;
34906	if (k & `0b00000001`) != `0` {
34907	let extracta: f64 = simd_extract!(a, `0`);
34908	let extractb: f64 = simd_extract!(b, `0`);
34909	add = extracta * extractb;
34910	}
34911	simd_insert!(a, `0`, add)
34912	}
34913
34914	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34915	///
34916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
34917	#[inline]
34918	#[target_feature(enable = "avx512f")]
34919	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34920	#[cfg_attr(test, assert_instr(vdivss))]
34921	pub unsafe fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34922	let extractsrc: f32 = simd_extract!(src, `0`);
34923	let mut add: f32 = extractsrc;
34924	if (k & `0b00000001`) != `0` {
34925	let extracta: f32 = simd_extract!(a, `0`);
34926	let extractb: f32 = simd_extract!(b, `0`);
34927	add = extracta / extractb;
34928	}
34929	simd_insert!(a, `0`, add)
34930	}
34931
34932	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34933	///
34934	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
34935	#[inline]
34936	#[target_feature(enable = "avx512f")]
34937	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34938	#[cfg_attr(test, assert_instr(vdivss))]
34939	pub unsafe fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34940	let mut add: f32 = `0.`;
34941	if (k & `0b00000001`) != `0` {
34942	let extracta: f32 = simd_extract!(a, `0`);
34943	let extractb: f32 = simd_extract!(b, `0`);
34944	add = extracta / extractb;
34945	}
34946	simd_insert!(a, `0`, add)
34947	}
34948
34949	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34950	///
34951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
34952	#[inline]
34953	#[target_feature(enable = "avx512f")]
34954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34955	#[cfg_attr(test, assert_instr(vdivsd))]
34956	pub unsafe fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34957	let extractsrc: f64 = simd_extract!(src, `0`);
34958	let mut add: f64 = extractsrc;
34959	if (k & `0b00000001`) != `0` {
34960	let extracta: f64 = simd_extract!(a, `0`);
34961	let extractb: f64 = simd_extract!(b, `0`);
34962	add = extracta / extractb;
34963	}
34964	simd_insert!(a, `0`, add)
34965	}
34966
34967	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34968	///
34969	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
34970	#[inline]
34971	#[target_feature(enable = "avx512f")]
34972	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34973	#[cfg_attr(test, assert_instr(vdivsd))]
34974	pub unsafe fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34975	let mut add: f64 = `0.`;
34976	if (k & `0b00000001`) != `0` {
34977	let extracta: f64 = simd_extract!(a, `0`);
34978	let extractb: f64 = simd_extract!(b, `0`);
34979	add = extracta / extractb;
34980	}
34981	simd_insert!(a, `0`, add)
34982	}
34983
34984	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34985	///
34986	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
34987	#[inline]
34988	#[target_feature(enable = "avx512f")]
34989	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34990	#[cfg_attr(test, assert_instr(vmaxss))]
34991	pub unsafe fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34992	transmute(src:vmaxss(
34993	a:a.as_f32x4(),
34994	b:b.as_f32x4(),
34995	src:src.as_f32x4(),
34996	mask:k,
34997	_MM_FROUND_CUR_DIRECTION,
34998	))
34999	}
35000
35001	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35002	///
35003	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
35004	#[inline]
35005	#[target_feature(enable = "avx512f")]
35006	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35007	#[cfg_attr(test, assert_instr(vmaxss))]
35008	pub unsafe fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35009	transmute(src:vmaxss(
35010	a:a.as_f32x4(),
35011	b:b.as_f32x4(),
35012	src:_mm_setzero_ps().as_f32x4(),
35013	mask:k,
35014	_MM_FROUND_CUR_DIRECTION,
35015	))
35016	}
35017
35018	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35019	///
35020	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
35021	#[inline]
35022	#[target_feature(enable = "avx512f")]
35023	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35024	#[cfg_attr(test, assert_instr(vmaxsd))]
35025	pub unsafe fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35026	transmute(src:vmaxsd(
35027	a:a.as_f64x2(),
35028	b:b.as_f64x2(),
35029	src:src.as_f64x2(),
35030	mask:k,
35031	_MM_FROUND_CUR_DIRECTION,
35032	))
35033	}
35034
35035	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35036	///
35037	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
35038	#[inline]
35039	#[target_feature(enable = "avx512f")]
35040	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35041	#[cfg_attr(test, assert_instr(vmaxsd))]
35042	pub unsafe fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35043	transmute(src:vmaxsd(
35044	a:a.as_f64x2(),
35045	b:b.as_f64x2(),
35046	src:_mm_setzero_pd().as_f64x2(),
35047	mask:k,
35048	_MM_FROUND_CUR_DIRECTION,
35049	))
35050	}
35051
35052	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35053	///
35054	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
35055	#[inline]
35056	#[target_feature(enable = "avx512f")]
35057	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35058	#[cfg_attr(test, assert_instr(vminss))]
35059	pub unsafe fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35060	transmute(src:vminss(
35061	a:a.as_f32x4(),
35062	b:b.as_f32x4(),
35063	src:src.as_f32x4(),
35064	mask:k,
35065	_MM_FROUND_CUR_DIRECTION,
35066	))
35067	}
35068
35069	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35070	///
35071	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
35072	#[inline]
35073	#[target_feature(enable = "avx512f")]
35074	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35075	#[cfg_attr(test, assert_instr(vminss))]
35076	pub unsafe fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35077	transmute(src:vminss(
35078	a:a.as_f32x4(),
35079	b:b.as_f32x4(),
35080	src:_mm_setzero_ps().as_f32x4(),
35081	mask:k,
35082	_MM_FROUND_CUR_DIRECTION,
35083	))
35084	}
35085
35086	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35087	///
35088	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
35089	#[inline]
35090	#[target_feature(enable = "avx512f")]
35091	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35092	#[cfg_attr(test, assert_instr(vminsd))]
35093	pub unsafe fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35094	transmute(src:vminsd(
35095	a:a.as_f64x2(),
35096	b:b.as_f64x2(),
35097	src:src.as_f64x2(),
35098	mask:k,
35099	_MM_FROUND_CUR_DIRECTION,
35100	))
35101	}
35102
35103	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35104	///
35105	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
35106	#[inline]
35107	#[target_feature(enable = "avx512f")]
35108	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35109	#[cfg_attr(test, assert_instr(vminsd))]
35110	pub unsafe fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35111	transmute(src:vminsd(
35112	a:a.as_f64x2(),
35113	b:b.as_f64x2(),
35114	src:_mm_setzero_pd().as_f64x2(),
35115	mask:k,
35116	_MM_FROUND_CUR_DIRECTION,
35117	))
35118	}
35119
35120	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35121	///
35122	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
35123	#[inline]
35124	#[target_feature(enable = "avx512f")]
35125	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35126	#[cfg_attr(test, assert_instr(vsqrtss))]
35127	pub unsafe fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35128	transmute(src:vsqrtss(
35129	a:a.as_f32x4(),
35130	b:b.as_f32x4(),
35131	src:src.as_f32x4(),
35132	mask:k,
35133	_MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC,
35134	))
35135	}
35136
35137	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35138	///
35139	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
35140	#[inline]
35141	#[target_feature(enable = "avx512f")]
35142	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35143	#[cfg_attr(test, assert_instr(vsqrtss))]
35144	pub unsafe fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35145	transmute(src:vsqrtss(
35146	a:a.as_f32x4(),
35147	b:b.as_f32x4(),
35148	src:_mm_setzero_ps().as_f32x4(),
35149	mask:k,
35150	_MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC,
35151	))
35152	}
35153
35154	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35155	///
35156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
35157	#[inline]
35158	#[target_feature(enable = "avx512f")]
35159	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35160	#[cfg_attr(test, assert_instr(vsqrtsd))]
35161	pub unsafe fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35162	transmute(src:vsqrtsd(
35163	a:a.as_f64x2(),
35164	b:b.as_f64x2(),
35165	src:src.as_f64x2(),
35166	mask:k,
35167	_MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC,
35168	))
35169	}
35170
35171	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35172	///
35173	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
35174	#[inline]
35175	#[target_feature(enable = "avx512f")]
35176	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35177	#[cfg_attr(test, assert_instr(vsqrtsd))]
35178	pub unsafe fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35179	transmute(src:vsqrtsd(
35180	a:a.as_f64x2(),
35181	b:b.as_f64x2(),
35182	src:_mm_setzero_pd().as_f64x2(),
35183	mask:k,
35184	_MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC,
35185	))
35186	}
35187
35188	/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35189	///
35190	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
35191	#[inline]
35192	#[target_feature(enable = "avx512f")]
35193	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35194	#[cfg_attr(test, assert_instr(vrsqrt14ss))]
35195	pub unsafe fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
35196	transmute(src:vrsqrt14ss(
35197	a:a.as_f32x4(),
35198	b:b.as_f32x4(),
35199	src:_mm_setzero_ps().as_f32x4(),
35200	mask:`0b1`,
35201	))
35202	}
35203
35204	/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35205	///
35206	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
35207	#[inline]
35208	#[target_feature(enable = "avx512f")]
35209	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35210	#[cfg_attr(test, assert_instr(vrsqrt14ss))]
35211	pub unsafe fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35212	transmute(src:vrsqrt14ss(a:a.as_f32x4(), b:b.as_f32x4(), src:src.as_f32x4(), mask:k))
35213	}
35214
35215	/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35216	///
35217	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
35218	#[inline]
35219	#[target_feature(enable = "avx512f")]
35220	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35221	#[cfg_attr(test, assert_instr(vrsqrt14ss))]
35222	pub unsafe fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35223	transmute(src:vrsqrt14ss(
35224	a:a.as_f32x4(),
35225	b:b.as_f32x4(),
35226	src:_mm_setzero_ps().as_f32x4(),
35227	mask:k,
35228	))
35229	}
35230
35231	/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35232	///
35233	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
35234	#[inline]
35235	#[target_feature(enable = "avx512f")]
35236	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35237	#[cfg_attr(test, assert_instr(vrsqrt14sd))]
35238	pub unsafe fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
35239	transmute(src:vrsqrt14sd(
35240	a:a.as_f64x2(),
35241	b:b.as_f64x2(),
35242	src:_mm_setzero_pd().as_f64x2(),
35243	mask:`0b1`,
35244	))
35245	}
35246
35247	/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35248	///
35249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
35250	#[inline]
35251	#[target_feature(enable = "avx512f")]
35252	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35253	#[cfg_attr(test, assert_instr(vrsqrt14sd))]
35254	pub unsafe fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35255	transmute(src:vrsqrt14sd(a:a.as_f64x2(), b:b.as_f64x2(), src:src.as_f64x2(), mask:k))
35256	}
35257
35258	/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35259	///
35260	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
35261	#[inline]
35262	#[target_feature(enable = "avx512f")]
35263	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35264	#[cfg_attr(test, assert_instr(vrsqrt14sd))]
35265	pub unsafe fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35266	transmute(src:vrsqrt14sd(
35267	a:a.as_f64x2(),
35268	b:b.as_f64x2(),
35269	src:_mm_setzero_pd().as_f64x2(),
35270	mask:k,
35271	))
35272	}
35273
35274	/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35275	///
35276	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
35277	#[inline]
35278	#[target_feature(enable = "avx512f")]
35279	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35280	#[cfg_attr(test, assert_instr(vrcp14ss))]
35281	pub unsafe fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
35282	transmute(src:vrcp14ss(
35283	a:a.as_f32x4(),
35284	b:b.as_f32x4(),
35285	src:_mm_setzero_ps().as_f32x4(),
35286	mask:`0b1`,
35287	))
35288	}
35289
35290	/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35291	///
35292	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
35293	#[inline]
35294	#[target_feature(enable = "avx512f")]
35295	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35296	#[cfg_attr(test, assert_instr(vrcp14ss))]
35297	pub unsafe fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35298	transmute(src:vrcp14ss(a:a.as_f32x4(), b:b.as_f32x4(), src:src.as_f32x4(), mask:k))
35299	}
35300
35301	/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35302	///
35303	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
35304	#[inline]
35305	#[target_feature(enable = "avx512f")]
35306	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35307	#[cfg_attr(test, assert_instr(vrcp14ss))]
35308	pub unsafe fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35309	transmute(src:vrcp14ss(
35310	a:a.as_f32x4(),
35311	b:b.as_f32x4(),
35312	src:_mm_setzero_ps().as_f32x4(),
35313	mask:k,
35314	))
35315	}
35316
35317	/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35318	///
35319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
35320	#[inline]
35321	#[target_feature(enable = "avx512f")]
35322	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35323	#[cfg_attr(test, assert_instr(vrcp14sd))]
35324	pub unsafe fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
35325	transmute(src:vrcp14sd(
35326	a:a.as_f64x2(),
35327	b:b.as_f64x2(),
35328	src:_mm_setzero_pd().as_f64x2(),
35329	mask:`0b1`,
35330	))
35331	}
35332
35333	/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35334	///
35335	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
35336	#[inline]
35337	#[target_feature(enable = "avx512f")]
35338	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35339	#[cfg_attr(test, assert_instr(vrcp14sd))]
35340	pub unsafe fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35341	transmute(src:vrcp14sd(a:a.as_f64x2(), b:b.as_f64x2(), src:src.as_f64x2(), mask:k))
35342	}
35343
35344	/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35345	///
35346	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
35347	#[inline]
35348	#[target_feature(enable = "avx512f")]
35349	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35350	#[cfg_attr(test, assert_instr(vrcp14sd))]
35351	pub unsafe fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35352	transmute(src:vrcp14sd(
35353	a:a.as_f64x2(),
35354	b:b.as_f64x2(),
35355	src:_mm_setzero_pd().as_f64x2(),
35356	mask:k,
35357	))
35358	}
35359
35360	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35361	///
35362	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
35363	#[inline]
35364	#[target_feature(enable = "avx512f")]
35365	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35366	#[cfg_attr(test, assert_instr(vgetexpss))]
35367	pub unsafe fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
35368	transmute(src:vgetexpss(
35369	a:a.as_f32x4(),
35370	b:b.as_f32x4(),
35371	src:_mm_setzero_ps().as_f32x4(),
35372	mask:`0b1`,
35373	_MM_FROUND_NO_EXC,
35374	))
35375	}
35376
35377	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35378	///
35379	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
35380	#[inline]
35381	#[target_feature(enable = "avx512f")]
35382	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35383	#[cfg_attr(test, assert_instr(vgetexpss))]
35384	pub unsafe fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35385	transmute(src:vgetexpss(
35386	a:a.as_f32x4(),
35387	b:b.as_f32x4(),
35388	src:src.as_f32x4(),
35389	mask:k,
35390	_MM_FROUND_NO_EXC,
35391	))
35392	}
35393
35394	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35395	///
35396	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
35397	#[inline]
35398	#[target_feature(enable = "avx512f")]
35399	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35400	#[cfg_attr(test, assert_instr(vgetexpss))]
35401	pub unsafe fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35402	transmute(src:vgetexpss(
35403	a:a.as_f32x4(),
35404	b:b.as_f32x4(),
35405	src:_mm_setzero_ps().as_f32x4(),
35406	mask:k,
35407	_MM_FROUND_NO_EXC,
35408	))
35409	}
35410
35411	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35412	///
35413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
35414	#[inline]
35415	#[target_feature(enable = "avx512f")]
35416	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35417	#[cfg_attr(test, assert_instr(vgetexpsd))]
35418	pub unsafe fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
35419	transmute(src:vgetexpsd(
35420	a:a.as_f64x2(),
35421	b:b.as_f64x2(),
35422	src:_mm_setzero_pd().as_f64x2(),
35423	mask:`0b1`,
35424	_MM_FROUND_NO_EXC,
35425	))
35426	}
35427
35428	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35429	///
35430	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
35431	#[inline]
35432	#[target_feature(enable = "avx512f")]
35433	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35434	#[cfg_attr(test, assert_instr(vgetexpsd))]
35435	pub unsafe fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35436	transmute(src:vgetexpsd(
35437	a:a.as_f64x2(),
35438	b:b.as_f64x2(),
35439	src:src.as_f64x2(),
35440	mask:k,
35441	_MM_FROUND_NO_EXC,
35442	))
35443	}
35444
35445	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35446	///
35447	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
35448	#[inline]
35449	#[target_feature(enable = "avx512f")]
35450	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35451	#[cfg_attr(test, assert_instr(vgetexpsd))]
35452	pub unsafe fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35453	transmute(src:vgetexpsd(
35454	a:a.as_f64x2(),
35455	b:b.as_f64x2(),
35456	src:_mm_setzero_pd().as_f64x2(),
35457	mask:k,
35458	_MM_FROUND_NO_EXC,
35459	))
35460	}
35461
35462	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35463	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35464	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
35465	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
35466	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
35467	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35468	/// The sign is determined by sc which can take the following values:\
35469	/// _MM_MANT_SIGN_src // sign = sign(src)\
35470	/// _MM_MANT_SIGN_zero // sign = 0\
35471	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
35472	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35473	///
35474	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
35475	#[inline]
35476	#[target_feature(enable = "avx512f")]
35477	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35478	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`))]
35479	#[rustc_legacy_const_generics(`2`, `3`)]
35480	pub unsafe fn _mm_getmant_ss<
35481	const NORM: _MM_MANTISSA_NORM_ENUM,
35482	const SIGN: _MM_MANTISSA_SIGN_ENUM,
35483	>(
35484	a: __m128,
35485	b: __m128,
35486	) -> __m128 {
35487	static_assert_uimm_bits!(NORM, `4`);
35488	static_assert_uimm_bits!(SIGN, `2`);
35489	let a: f32x4 = a.as_f32x4();
35490	let b: f32x4 = b.as_f32x4();
35491	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
35492	let r: f32x4 = vgetmantss(a, b, SIGN << `2` \| NORM, src:zero, m:`0b1`, _MM_FROUND_CUR_DIRECTION);
35493	transmute(src:r)
35494	}
35495
35496	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35497	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35498	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
35499	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
35500	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
35501	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35502	/// The sign is determined by sc which can take the following values:\
35503	/// _MM_MANT_SIGN_src // sign = sign(src)\
35504	/// _MM_MANT_SIGN_zero // sign = 0\
35505	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
35506	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35507	///
35508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
35509	#[inline]
35510	#[target_feature(enable = "avx512f")]
35511	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35512	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`))]
35513	#[rustc_legacy_const_generics(`4`, `5`)]
35514	pub unsafe fn _mm_mask_getmant_ss<
35515	const NORM: _MM_MANTISSA_NORM_ENUM,
35516	const SIGN: _MM_MANTISSA_SIGN_ENUM,
35517	>(
35518	src: __m128,
35519	k: __mmask8,
35520	a: __m128,
35521	b: __m128,
35522	) -> __m128 {
35523	static_assert_uimm_bits!(NORM, `4`);
35524	static_assert_uimm_bits!(SIGN, `2`);
35525	let a: f32x4 = a.as_f32x4();
35526	let b: f32x4 = b.as_f32x4();
35527	let src: f32x4 = src.as_f32x4();
35528	let r: f32x4 = vgetmantss(a, b, SIGN << `2` \| NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
35529	transmute(src:r)
35530	}
35531
35532	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35533	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35534	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
35535	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
35536	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
35537	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35538	/// The sign is determined by sc which can take the following values:\
35539	/// _MM_MANT_SIGN_src // sign = sign(src)\
35540	/// _MM_MANT_SIGN_zero // sign = 0\
35541	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
35542	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35543	///
35544	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
35545	#[inline]
35546	#[target_feature(enable = "avx512f")]
35547	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35548	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`))]
35549	#[rustc_legacy_const_generics(`3`, `4`)]
35550	pub unsafe fn _mm_maskz_getmant_ss<
35551	const NORM: _MM_MANTISSA_NORM_ENUM,
35552	const SIGN: _MM_MANTISSA_SIGN_ENUM,
35553	>(
35554	k: __mmask8,
35555	a: __m128,
35556	b: __m128,
35557	) -> __m128 {
35558	static_assert_uimm_bits!(NORM, `4`);
35559	static_assert_uimm_bits!(SIGN, `2`);
35560	let a: f32x4 = a.as_f32x4();
35561	let b: f32x4 = b.as_f32x4();
35562	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
35563	let r: f32x4 = vgetmantss(a, b, SIGN << `2` \| NORM, src:zero, m:k, _MM_FROUND_CUR_DIRECTION);
35564	transmute(src:r)
35565	}
35566
35567	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35568	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35569	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
35570	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
35571	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
35572	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35573	/// The sign is determined by sc which can take the following values:\
35574	/// _MM_MANT_SIGN_src // sign = sign(src)\
35575	/// _MM_MANT_SIGN_zero // sign = 0\
35576	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
35577	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35578	///
35579	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
35580	#[inline]
35581	#[target_feature(enable = "avx512f")]
35582	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35583	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`))]
35584	#[rustc_legacy_const_generics(`2`, `3`)]
35585	pub unsafe fn _mm_getmant_sd<
35586	const NORM: _MM_MANTISSA_NORM_ENUM,
35587	const SIGN: _MM_MANTISSA_SIGN_ENUM,
35588	>(
35589	a: __m128d,
35590	b: __m128d,
35591	) -> __m128d {
35592	static_assert_uimm_bits!(NORM, `4`);
35593	static_assert_uimm_bits!(SIGN, `2`);
35594	let a: f64x2 = a.as_f64x2();
35595	let b: f64x2 = b.as_f64x2();
35596	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
35597	let r: f64x2 = vgetmantsd(a, b, SIGN << `2` \| NORM, src:zero, m:`0b1`, _MM_FROUND_CUR_DIRECTION);
35598	transmute(src:r)
35599	}
35600
35601	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35602	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35603	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
35604	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
35605	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
35606	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35607	/// The sign is determined by sc which can take the following values:\
35608	/// _MM_MANT_SIGN_src // sign = sign(src)\
35609	/// _MM_MANT_SIGN_zero // sign = 0\
35610	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
35611	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35612	///
35613	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
35614	#[inline]
35615	#[target_feature(enable = "avx512f")]
35616	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35617	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`))]
35618	#[rustc_legacy_const_generics(`4`, `5`)]
35619	pub unsafe fn _mm_mask_getmant_sd<
35620	const NORM: _MM_MANTISSA_NORM_ENUM,
35621	const SIGN: _MM_MANTISSA_SIGN_ENUM,
35622	>(
35623	src: __m128d,
35624	k: __mmask8,
35625	a: __m128d,
35626	b: __m128d,
35627	) -> __m128d {
35628	static_assert_uimm_bits!(NORM, `4`);
35629	static_assert_uimm_bits!(SIGN, `2`);
35630	let a: f64x2 = a.as_f64x2();
35631	let b: f64x2 = b.as_f64x2();
35632	let src: f64x2 = src.as_f64x2();
35633	let r: f64x2 = vgetmantsd(a, b, SIGN << `2` \| NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
35634	transmute(src:r)
35635	}
35636
35637	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35638	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35639	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
35640	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
35641	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
35642	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35643	/// The sign is determined by sc which can take the following values:\
35644	/// _MM_MANT_SIGN_src // sign = sign(src)\
35645	/// _MM_MANT_SIGN_zero // sign = 0\
35646	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
35647	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35648	///
35649	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
35650	#[inline]
35651	#[target_feature(enable = "avx512f")]
35652	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35653	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`))]
35654	#[rustc_legacy_const_generics(`3`, `4`)]
35655	pub unsafe fn _mm_maskz_getmant_sd<
35656	const NORM: _MM_MANTISSA_NORM_ENUM,
35657	const SIGN: _MM_MANTISSA_SIGN_ENUM,
35658	>(
35659	k: __mmask8,
35660	a: __m128d,
35661	b: __m128d,
35662	) -> __m128d {
35663	static_assert_uimm_bits!(NORM, `4`);
35664	static_assert_uimm_bits!(SIGN, `2`);
35665	let a: f64x2 = a.as_f64x2();
35666	let b: f64x2 = b.as_f64x2();
35667	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
35668	let r: f64x2 = vgetmantsd(a, b, SIGN << `2` \| NORM, src:zero, m:k, _MM_FROUND_CUR_DIRECTION);
35669	transmute(src:r)
35670	}
35671
35672	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
35673	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35674	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
35675	/// _MM_FROUND_TO_NEG_INF // round down\
35676	/// _MM_FROUND_TO_POS_INF // round up\
35677	/// _MM_FROUND_TO_ZERO // truncate\
35678	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
35679	///
35680	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
35681	#[inline]
35682	#[target_feature(enable = "avx512f")]
35683	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35684	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `255`))]
35685	#[rustc_legacy_const_generics(`2`)]
35686	pub unsafe fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
35687	static_assert_uimm_bits!(IMM8, `8`);
35688	let a: f32x4 = a.as_f32x4();
35689	let b: f32x4 = b.as_f32x4();
35690	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
35691	let r: f32x4 = vrndscaless(a, b, src:zero, mask:`0b11111111`, IMM8, _MM_FROUND_CUR_DIRECTION);
35692	transmute(src:r)
35693	}
35694
35695	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
35696	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35697	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
35698	/// _MM_FROUND_TO_NEG_INF // round down\
35699	/// _MM_FROUND_TO_POS_INF // round up\
35700	/// _MM_FROUND_TO_ZERO // truncate\
35701	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
35702	///
35703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
35704	#[inline]
35705	#[target_feature(enable = "avx512f")]
35706	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35707	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`))]
35708	#[rustc_legacy_const_generics(`4`)]
35709	pub unsafe fn _mm_mask_roundscale_ss<const IMM8: i32>(
35710	src: __m128,
35711	k: __mmask8,
35712	a: __m128,
35713	b: __m128,
35714	) -> __m128 {
35715	static_assert_uimm_bits!(IMM8, `8`);
35716	let a: f32x4 = a.as_f32x4();
35717	let b: f32x4 = b.as_f32x4();
35718	let src: f32x4 = src.as_f32x4();
35719	let r: f32x4 = vrndscaless(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
35720	transmute(src:r)
35721	}
35722
35723	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
35724	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35725	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
35726	/// _MM_FROUND_TO_NEG_INF // round down\
35727	/// _MM_FROUND_TO_POS_INF // round up\
35728	/// _MM_FROUND_TO_ZERO // truncate\
35729	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
35730	///
35731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
35732	#[inline]
35733	#[target_feature(enable = "avx512f")]
35734	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35735	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`))]
35736	#[rustc_legacy_const_generics(`3`)]
35737	pub unsafe fn _mm_maskz_roundscale_ss<const IMM8: i32>(
35738	k: __mmask8,
35739	a: __m128,
35740	b: __m128,
35741	) -> __m128 {
35742	static_assert_uimm_bits!(IMM8, `8`);
35743	let a: f32x4 = a.as_f32x4();
35744	let b: f32x4 = b.as_f32x4();
35745	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
35746	let r: f32x4 = vrndscaless(a, b, src:zero, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
35747	transmute(src:r)
35748	}
35749
35750	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
35751	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35752	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
35753	/// _MM_FROUND_TO_NEG_INF // round down\
35754	/// _MM_FROUND_TO_POS_INF // round up\
35755	/// _MM_FROUND_TO_ZERO // truncate\
35756	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
35757	///
35758	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
35759	#[inline]
35760	#[target_feature(enable = "avx512f")]
35761	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35762	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `255`))]
35763	#[rustc_legacy_const_generics(`2`)]
35764	pub unsafe fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
35765	static_assert_uimm_bits!(IMM8, `8`);
35766	let a: f64x2 = a.as_f64x2();
35767	let b: f64x2 = b.as_f64x2();
35768	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
35769	let r: f64x2 = vrndscalesd(a, b, src:zero, mask:`0b11111111`, IMM8, _MM_FROUND_CUR_DIRECTION);
35770	transmute(src:r)
35771	}
35772
35773	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
35774	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35775	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
35776	/// _MM_FROUND_TO_NEG_INF // round down\
35777	/// _MM_FROUND_TO_POS_INF // round up\
35778	/// _MM_FROUND_TO_ZERO // truncate\
35779	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
35780	///
35781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
35782	#[inline]
35783	#[target_feature(enable = "avx512f")]
35784	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35785	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`))]
35786	#[rustc_legacy_const_generics(`4`)]
35787	pub unsafe fn _mm_mask_roundscale_sd<const IMM8: i32>(
35788	src: __m128d,
35789	k: __mmask8,
35790	a: __m128d,
35791	b: __m128d,
35792	) -> __m128d {
35793	static_assert_uimm_bits!(IMM8, `8`);
35794	let a: f64x2 = a.as_f64x2();
35795	let b: f64x2 = b.as_f64x2();
35796	let src: f64x2 = src.as_f64x2();
35797	let r: f64x2 = vrndscalesd(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
35798	transmute(src:r)
35799	}
35800
35801	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
35802	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35803	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
35804	/// _MM_FROUND_TO_NEG_INF // round down\
35805	/// _MM_FROUND_TO_POS_INF // round up\
35806	/// _MM_FROUND_TO_ZERO // truncate\
35807	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
35808	///
35809	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
35810	#[inline]
35811	#[target_feature(enable = "avx512f")]
35812	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35813	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`))]
35814	#[rustc_legacy_const_generics(`3`)]
35815	pub unsafe fn _mm_maskz_roundscale_sd<const IMM8: i32>(
35816	k: __mmask8,
35817	a: __m128d,
35818	b: __m128d,
35819	) -> __m128d {
35820	static_assert_uimm_bits!(IMM8, `8`);
35821	let a: f64x2 = a.as_f64x2();
35822	let b: f64x2 = b.as_f64x2();
35823	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
35824	let r: f64x2 = vrndscalesd(a, b, src:zero, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
35825	transmute(src:r)
35826	}
35827
35828	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
35829	///
35830	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
35831	#[inline]
35832	#[target_feature(enable = "avx512f")]
35833	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35834	#[cfg_attr(test, assert_instr(vscalefss))]
35835	pub unsafe fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
35836	let a: f32x4 = a.as_f32x4();
35837	let b: f32x4 = b.as_f32x4();
35838	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
35839	transmute(src:vscalefss(a, b, src:zero, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION))
35840	}
35841
35842	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35843	///
35844	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
35845	#[inline]
35846	#[target_feature(enable = "avx512f")]
35847	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35848	#[cfg_attr(test, assert_instr(vscalefss))]
35849	pub unsafe fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35850	let a: f32x4 = a.as_f32x4();
35851	let b: f32x4 = b.as_f32x4();
35852	let src: f32x4 = src.as_f32x4();
35853	transmute(src:vscalefss(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION))
35854	}
35855
35856	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35857	///
35858	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
35859	#[inline]
35860	#[target_feature(enable = "avx512f")]
35861	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35862	#[cfg_attr(test, assert_instr(vscalefss))]
35863	pub unsafe fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35864	transmute(src:vscalefss(
35865	a:a.as_f32x4(),
35866	b:b.as_f32x4(),
35867	src:_mm_setzero_ps().as_f32x4(),
35868	mask:k,
35869	_MM_FROUND_CUR_DIRECTION,
35870	))
35871	}
35872
35873	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
35874	///
35875	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
35876	#[inline]
35877	#[target_feature(enable = "avx512f")]
35878	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35879	#[cfg_attr(test, assert_instr(vscalefsd))]
35880	pub unsafe fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
35881	transmute(src:vscalefsd(
35882	a:a.as_f64x2(),
35883	b:b.as_f64x2(),
35884	src:_mm_setzero_pd().as_f64x2(),
35885	mask:`0b11111111`,
35886	_MM_FROUND_CUR_DIRECTION,
35887	))
35888	}
35889
35890	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35891	///
35892	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
35893	#[inline]
35894	#[target_feature(enable = "avx512f")]
35895	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35896	#[cfg_attr(test, assert_instr(vscalefsd))]
35897	pub unsafe fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35898	transmute(src:vscalefsd(
35899	a:a.as_f64x2(),
35900	b:b.as_f64x2(),
35901	src:src.as_f64x2(),
35902	mask:k,
35903	_MM_FROUND_CUR_DIRECTION,
35904	))
35905	}
35906
35907	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35908	///
35909	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
35910	#[inline]
35911	#[target_feature(enable = "avx512f")]
35912	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35913	#[cfg_attr(test, assert_instr(vscalefsd))]
35914	pub unsafe fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35915	transmute(src:vscalefsd(
35916	a:a.as_f64x2(),
35917	b:b.as_f64x2(),
35918	src:_mm_setzero_pd().as_f64x2(),
35919	mask:k,
35920	_MM_FROUND_CUR_DIRECTION,
35921	))
35922	}
35923
35924	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35925	///
35926	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
35927	#[inline]
35928	#[target_feature(enable = "avx512f")]
35929	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35930	#[cfg_attr(test, assert_instr(vfmadd213ss))]
35931	pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
35932	let mut fmadd: f32 = simd_extract!(a, `0`);
35933	if (k & `0b00000001`) != `0` {
35934	let extractb: f32 = simd_extract!(b, `0`);
35935	let extractc: f32 = simd_extract!(c, `0`);
35936	fmadd = vfmadd132ss(a:fmadd, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
35937	}
35938	simd_insert!(a, `0`, fmadd)
35939	}
35940
35941	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35942	///
35943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
35944	#[inline]
35945	#[target_feature(enable = "avx512f")]
35946	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35947	#[cfg_attr(test, assert_instr(vfmadd213ss))]
35948	pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
35949	let mut fmadd: f32 = `0.`;
35950	if (k & `0b00000001`) != `0` {
35951	let extracta: f32 = simd_extract!(a, `0`);
35952	let extractb: f32 = simd_extract!(b, `0`);
35953	let extractc: f32 = simd_extract!(c, `0`);
35954	fmadd = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
35955	}
35956	simd_insert!(a, `0`, fmadd)
35957	}
35958
35959	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
35960	///
35961	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
35962	#[inline]
35963	#[target_feature(enable = "avx512f")]
35964	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35965	#[cfg_attr(test, assert_instr(vfmadd213ss))]
35966	pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
35967	let mut fmadd: f32 = simd_extract!(c, `0`);
35968	if (k & `0b00000001`) != `0` {
35969	let extracta: f32 = simd_extract!(a, `0`);
35970	let extractb: f32 = simd_extract!(b, `0`);
35971	fmadd = vfmadd132ss(a:extracta, b:extractb, c:fmadd, _MM_FROUND_CUR_DIRECTION);
35972	}
35973	simd_insert!(c, `0`, fmadd)
35974	}
35975
35976	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35977	///
35978	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
35979	#[inline]
35980	#[target_feature(enable = "avx512f")]
35981	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35982	#[cfg_attr(test, assert_instr(vfmadd213sd))]
35983	pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
35984	let mut fmadd: f64 = simd_extract!(a, `0`);
35985	if (k & `0b00000001`) != `0` {
35986	let extractb: f64 = simd_extract!(b, `0`);
35987	let extractc: f64 = simd_extract!(c, `0`);
35988	fmadd = vfmadd132sd(a:fmadd, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
35989	}
35990	simd_insert!(a, `0`, fmadd)
35991	}
35992
35993	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35994	///
35995	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
35996	#[inline]
35997	#[target_feature(enable = "avx512f")]
35998	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35999	#[cfg_attr(test, assert_instr(vfmadd213sd))]
36000	pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
36001	let mut fmadd: f64 = `0.`;
36002	if (k & `0b00000001`) != `0` {
36003	let extracta: f64 = simd_extract!(a, `0`);
36004	let extractb: f64 = simd_extract!(b, `0`);
36005	let extractc: f64 = simd_extract!(c, `0`);
36006	fmadd = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36007	}
36008	simd_insert!(a, `0`, fmadd)
36009	}
36010
36011	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
36012	///
36013	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
36014	#[inline]
36015	#[target_feature(enable = "avx512f")]
36016	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36017	#[cfg_attr(test, assert_instr(vfmadd213sd))]
36018	pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
36019	let mut fmadd: f64 = simd_extract!(c, `0`);
36020	if (k & `0b00000001`) != `0` {
36021	let extracta: f64 = simd_extract!(a, `0`);
36022	let extractb: f64 = simd_extract!(b, `0`);
36023	fmadd = vfmadd132sd(a:extracta, b:extractb, c:fmadd, _MM_FROUND_CUR_DIRECTION);
36024	}
36025	simd_insert!(c, `0`, fmadd)
36026	}
36027
36028	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
36029	///
36030	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
36031	#[inline]
36032	#[target_feature(enable = "avx512f")]
36033	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36034	#[cfg_attr(test, assert_instr(vfmsub213ss))]
36035	pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
36036	let mut fmsub: f32 = simd_extract!(a, `0`);
36037	if (k & `0b00000001`) != `0` {
36038	let extractb: f32 = simd_extract!(b, `0`);
36039	let extractc: f32 = simd_extract!(c, `0`);
36040	let extractc: f32 = -extractc;
36041	fmsub = vfmadd132ss(a:fmsub, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36042	}
36043	simd_insert!(a, `0`, fmsub)
36044	}
36045
36046	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36047	///
36048	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
36049	#[inline]
36050	#[target_feature(enable = "avx512f")]
36051	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36052	#[cfg_attr(test, assert_instr(vfmsub213ss))]
36053	pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
36054	let mut fmsub: f32 = `0.`;
36055	if (k & `0b00000001`) != `0` {
36056	let extracta: f32 = simd_extract!(a, `0`);
36057	let extractb: f32 = simd_extract!(b, `0`);
36058	let extractc: f32 = simd_extract!(c, `0`);
36059	let extractc: f32 = -extractc;
36060	fmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36061	}
36062	simd_insert!(a, `0`, fmsub)
36063	}
36064
36065	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
36066	///
36067	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
36068	#[inline]
36069	#[target_feature(enable = "avx512f")]
36070	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36071	#[cfg_attr(test, assert_instr(vfmsub213ss))]
36072	pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
36073	let mut fmsub: f32 = simd_extract!(c, `0`);
36074	if (k & `0b00000001`) != `0` {
36075	let extracta: f32 = simd_extract!(a, `0`);
36076	let extractb: f32 = simd_extract!(b, `0`);
36077	let extractc: f32 = -fmsub;
36078	fmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36079	}
36080	simd_insert!(c, `0`, fmsub)
36081	}
36082
36083	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36084	///
36085	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
36086	#[inline]
36087	#[target_feature(enable = "avx512f")]
36088	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36089	#[cfg_attr(test, assert_instr(vfmsub213sd))]
36090	pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
36091	let mut fmsub: f64 = simd_extract!(a, `0`);
36092	if (k & `0b00000001`) != `0` {
36093	let extractb: f64 = simd_extract!(b, `0`);
36094	let extractc: f64 = simd_extract!(c, `0`);
36095	let extractc: f64 = -extractc;
36096	fmsub = vfmadd132sd(a:fmsub, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36097	}
36098	simd_insert!(a, `0`, fmsub)
36099	}
36100
36101	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36102	///
36103	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
36104	#[inline]
36105	#[target_feature(enable = "avx512f")]
36106	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36107	#[cfg_attr(test, assert_instr(vfmsub213sd))]
36108	pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
36109	let mut fmsub: f64 = `0.`;
36110	if (k & `0b00000001`) != `0` {
36111	let extracta: f64 = simd_extract!(a, `0`);
36112	let extractb: f64 = simd_extract!(b, `0`);
36113	let extractc: f64 = simd_extract!(c, `0`);
36114	let extractc: f64 = -extractc;
36115	fmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36116	}
36117	simd_insert!(a, `0`, fmsub)
36118	}
36119
36120	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
36121	///
36122	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
36123	#[inline]
36124	#[target_feature(enable = "avx512f")]
36125	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36126	#[cfg_attr(test, assert_instr(vfmsub213sd))]
36127	pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
36128	let mut fmsub: f64 = simd_extract!(c, `0`);
36129	if (k & `0b00000001`) != `0` {
36130	let extracta: f64 = simd_extract!(a, `0`);
36131	let extractb: f64 = simd_extract!(b, `0`);
36132	let extractc: f64 = -fmsub;
36133	fmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36134	}
36135	simd_insert!(c, `0`, fmsub)
36136	}
36137
36138	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36139	///
36140	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
36141	#[inline]
36142	#[target_feature(enable = "avx512f")]
36143	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36144	#[cfg_attr(test, assert_instr(vfnmadd213ss))]
36145	pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
36146	let mut fnmadd: f32 = simd_extract!(a, `0`);
36147	if (k & `0b00000001`) != `0` {
36148	let extracta: f32 = -fnmadd;
36149	let extractb: f32 = simd_extract!(b, `0`);
36150	let extractc: f32 = simd_extract!(c, `0`);
36151	fnmadd = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36152	}
36153	simd_insert!(a, `0`, fnmadd)
36154	}
36155
36156	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36157	///
36158	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
36159	#[inline]
36160	#[target_feature(enable = "avx512f")]
36161	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36162	#[cfg_attr(test, assert_instr(vfnmadd213ss))]
36163	pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
36164	let mut fnmadd: f32 = `0.`;
36165	if (k & `0b00000001`) != `0` {
36166	let extracta: f32 = simd_extract!(a, `0`);
36167	let extracta: f32 = -extracta;
36168	let extractb: f32 = simd_extract!(b, `0`);
36169	let extractc: f32 = simd_extract!(c, `0`);
36170	fnmadd = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36171	}
36172	simd_insert!(a, `0`, fnmadd)
36173	}
36174
36175	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
36176	///
36177	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
36178	#[inline]
36179	#[target_feature(enable = "avx512f")]
36180	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36181	#[cfg_attr(test, assert_instr(vfnmadd213ss))]
36182	pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
36183	let mut fnmadd: f32 = simd_extract!(c, `0`);
36184	if (k & `0b00000001`) != `0` {
36185	let extracta: f32 = simd_extract!(a, `0`);
36186	let extracta: f32 = -extracta;
36187	let extractb: f32 = simd_extract!(b, `0`);
36188	fnmadd = vfmadd132ss(a:extracta, b:extractb, c:fnmadd, _MM_FROUND_CUR_DIRECTION);
36189	}
36190	simd_insert!(c, `0`, fnmadd)
36191	}
36192
36193	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36194	///
36195	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
36196	#[inline]
36197	#[target_feature(enable = "avx512f")]
36198	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36199	#[cfg_attr(test, assert_instr(vfnmadd213sd))]
36200	pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
36201	let mut fnmadd: f64 = simd_extract!(a, `0`);
36202	if (k & `0b00000001`) != `0` {
36203	let extracta: f64 = -fnmadd;
36204	let extractb: f64 = simd_extract!(b, `0`);
36205	let extractc: f64 = simd_extract!(c, `0`);
36206	fnmadd = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36207	}
36208	simd_insert!(a, `0`, fnmadd)
36209	}
36210
36211	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36212	///
36213	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
36214	#[inline]
36215	#[target_feature(enable = "avx512f")]
36216	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36217	#[cfg_attr(test, assert_instr(vfnmadd213sd))]
36218	pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
36219	let mut fnmadd: f64 = `0.`;
36220	if (k & `0b00000001`) != `0` {
36221	let extracta: f64 = simd_extract!(a, `0`);
36222	let extracta: f64 = -extracta;
36223	let extractb: f64 = simd_extract!(b, `0`);
36224	let extractc: f64 = simd_extract!(c, `0`);
36225	fnmadd = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36226	}
36227	simd_insert!(a, `0`, fnmadd)
36228	}
36229
36230	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
36231	///
36232	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
36233	#[inline]
36234	#[target_feature(enable = "avx512f")]
36235	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36236	#[cfg_attr(test, assert_instr(vfnmadd213sd))]
36237	pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
36238	let mut fnmadd: f64 = simd_extract!(c, `0`);
36239	if (k & `0b00000001`) != `0` {
36240	let extracta: f64 = simd_extract!(a, `0`);
36241	let extracta: f64 = -extracta;
36242	let extractb: f64 = simd_extract!(b, `0`);
36243	fnmadd = vfmadd132sd(a:extracta, b:extractb, c:fnmadd, _MM_FROUND_CUR_DIRECTION);
36244	}
36245	simd_insert!(c, `0`, fnmadd)
36246	}
36247
36248	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36249	///
36250	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
36251	#[inline]
36252	#[target_feature(enable = "avx512f")]
36253	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36254	#[cfg_attr(test, assert_instr(vfnmsub213ss))]
36255	pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
36256	let mut fnmsub: f32 = simd_extract!(a, `0`);
36257	if (k & `0b00000001`) != `0` {
36258	let extracta: f32 = -fnmsub;
36259	let extractb: f32 = simd_extract!(b, `0`);
36260	let extractc: f32 = simd_extract!(c, `0`);
36261	let extractc: f32 = -extractc;
36262	fnmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36263	}
36264	simd_insert!(a, `0`, fnmsub)
36265	}
36266
36267	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36268	///
36269	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
36270	#[inline]
36271	#[target_feature(enable = "avx512f")]
36272	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36273	#[cfg_attr(test, assert_instr(vfnmsub213ss))]
36274	pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
36275	let mut fnmsub: f32 = `0.`;
36276	if (k & `0b00000001`) != `0` {
36277	let extracta: f32 = simd_extract!(a, `0`);
36278	let extracta: f32 = -extracta;
36279	let extractb: f32 = simd_extract!(b, `0`);
36280	let extractc: f32 = simd_extract!(c, `0`);
36281	let extractc: f32 = -extractc;
36282	fnmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36283	}
36284	simd_insert!(a, `0`, fnmsub)
36285	}
36286
36287	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
36288	///
36289	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
36290	#[inline]
36291	#[target_feature(enable = "avx512f")]
36292	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36293	#[cfg_attr(test, assert_instr(vfnmsub213ss))]
36294	pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
36295	let mut fnmsub: f32 = simd_extract!(c, `0`);
36296	if (k & `0b00000001`) != `0` {
36297	let extracta: f32 = simd_extract!(a, `0`);
36298	let extracta: f32 = -extracta;
36299	let extractb: f32 = simd_extract!(b, `0`);
36300	let extractc: f32 = -fnmsub;
36301	fnmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36302	}
36303	simd_insert!(c, `0`, fnmsub)
36304	}
36305
36306	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36307	///
36308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
36309	#[inline]
36310	#[target_feature(enable = "avx512f")]
36311	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36312	#[cfg_attr(test, assert_instr(vfnmsub213sd))]
36313	pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
36314	let mut fnmsub: f64 = simd_extract!(a, `0`);
36315	if (k & `0b00000001`) != `0` {
36316	let extracta: f64 = -fnmsub;
36317	let extractb: f64 = simd_extract!(b, `0`);
36318	let extractc: f64 = simd_extract!(c, `0`);
36319	let extractc: f64 = -extractc;
36320	fnmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36321	}
36322	simd_insert!(a, `0`, fnmsub)
36323	}
36324
36325	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36326	///
36327	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
36328	#[inline]
36329	#[target_feature(enable = "avx512f")]
36330	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36331	#[cfg_attr(test, assert_instr(vfnmsub213sd))]
36332	pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
36333	let mut fnmsub: f64 = `0.`;
36334	if (k & `0b00000001`) != `0` {
36335	let extracta: f64 = simd_extract!(a, `0`);
36336	let extracta: f64 = -extracta;
36337	let extractb: f64 = simd_extract!(b, `0`);
36338	let extractc: f64 = simd_extract!(c, `0`);
36339	let extractc: f64 = -extractc;
36340	fnmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36341	}
36342	simd_insert!(a, `0`, fnmsub)
36343	}
36344
36345	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
36346	///
36347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
36348	#[inline]
36349	#[target_feature(enable = "avx512f")]
36350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36351	#[cfg_attr(test, assert_instr(vfnmsub213sd))]
36352	pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
36353	let mut fnmsub: f64 = simd_extract!(c, `0`);
36354	if (k & `0b00000001`) != `0` {
36355	let extracta: f64 = simd_extract!(a, `0`);
36356	let extracta: f64 = -extracta;
36357	let extractb: f64 = simd_extract!(b, `0`);
36358	let extractc: f64 = -fnmsub;
36359	fnmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36360	}
36361	simd_insert!(c, `0`, fnmsub)
36362	}
36363
36364	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36365	///
36366	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36367	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36368	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36369	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36370	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36371	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36372	///
36373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
36374	#[inline]
36375	#[target_feature(enable = "avx512f")]
36376	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36377	#[cfg_attr(test, assert_instr(vaddss, ROUNDING = `8`))]
36378	#[rustc_legacy_const_generics(`2`)]
36379	pub unsafe fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36380	static_assert_rounding!(ROUNDING);
36381	let a: f32x4 = a.as_f32x4();
36382	let b: f32x4 = b.as_f32x4();
36383	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36384	let r: f32x4 = vaddss(a, b, src:zero, mask:`0b1`, ROUNDING);
36385	transmute(src:r)
36386	}
36387
36388	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36389	///
36390	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36391	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36392	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36393	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36394	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36395	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36396	///
36397	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
36398	#[inline]
36399	#[target_feature(enable = "avx512f")]
36400	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36401	#[cfg_attr(test, assert_instr(vaddss, ROUNDING = `8`))]
36402	#[rustc_legacy_const_generics(`4`)]
36403	pub unsafe fn _mm_mask_add_round_ss<const ROUNDING: i32>(
36404	src: __m128,
36405	k: __mmask8,
36406	a: __m128,
36407	b: __m128,
36408	) -> __m128 {
36409	static_assert_rounding!(ROUNDING);
36410	let a: f32x4 = a.as_f32x4();
36411	let b: f32x4 = b.as_f32x4();
36412	let src: f32x4 = src.as_f32x4();
36413	let r: f32x4 = vaddss(a, b, src, mask:k, ROUNDING);
36414	transmute(src:r)
36415	}
36416
36417	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36418	///
36419	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36420	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36421	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36422	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36423	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36424	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36425	///
36426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
36427	#[inline]
36428	#[target_feature(enable = "avx512f")]
36429	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36430	#[cfg_attr(test, assert_instr(vaddss, ROUNDING = `8`))]
36431	#[rustc_legacy_const_generics(`3`)]
36432	pub unsafe fn _mm_maskz_add_round_ss<const ROUNDING: i32>(
36433	k: __mmask8,
36434	a: __m128,
36435	b: __m128,
36436	) -> __m128 {
36437	static_assert_rounding!(ROUNDING);
36438	let a: f32x4 = a.as_f32x4();
36439	let b: f32x4 = b.as_f32x4();
36440	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36441	let r: f32x4 = vaddss(a, b, src:zero, mask:k, ROUNDING);
36442	transmute(src:r)
36443	}
36444
36445	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36446	///
36447	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36448	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36449	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36450	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36451	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36452	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36453	///
36454	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
36455	#[inline]
36456	#[target_feature(enable = "avx512f")]
36457	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36458	#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = `8`))]
36459	#[rustc_legacy_const_generics(`2`)]
36460	pub unsafe fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
36461	static_assert_rounding!(ROUNDING);
36462	let a: f64x2 = a.as_f64x2();
36463	let b: f64x2 = b.as_f64x2();
36464	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36465	let r: f64x2 = vaddsd(a, b, src:zero, mask:`0b1`, ROUNDING);
36466	transmute(src:r)
36467	}
36468
36469	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36470	///
36471	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36472	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36473	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36474	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36475	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36476	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36477	///
36478	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_Sd&expand=149)
36479	#[inline]
36480	#[target_feature(enable = "avx512f")]
36481	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36482	#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = `8`))]
36483	#[rustc_legacy_const_generics(`4`)]
36484	pub unsafe fn _mm_mask_add_round_sd<const ROUNDING: i32>(
36485	src: __m128d,
36486	k: __mmask8,
36487	a: __m128d,
36488	b: __m128d,
36489	) -> __m128d {
36490	static_assert_rounding!(ROUNDING);
36491	let a: f64x2 = a.as_f64x2();
36492	let b: f64x2 = b.as_f64x2();
36493	let src: f64x2 = src.as_f64x2();
36494	let r: f64x2 = vaddsd(a, b, src, mask:k, ROUNDING);
36495	transmute(src:r)
36496	}
36497
36498	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36499	///
36500	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36501	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36502	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36503	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36504	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36505	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36506	///
36507	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
36508	#[inline]
36509	#[target_feature(enable = "avx512f")]
36510	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36511	#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = `8`))]
36512	#[rustc_legacy_const_generics(`3`)]
36513	pub unsafe fn _mm_maskz_add_round_sd<const ROUNDING: i32>(
36514	k: __mmask8,
36515	a: __m128d,
36516	b: __m128d,
36517	) -> __m128d {
36518	static_assert_rounding!(ROUNDING);
36519	let a: f64x2 = a.as_f64x2();
36520	let b: f64x2 = b.as_f64x2();
36521	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36522	let r: f64x2 = vaddsd(a, b, src:zero, mask:k, ROUNDING);
36523	transmute(src:r)
36524	}
36525
36526	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36527	///
36528	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36529	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36530	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36531	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36532	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36533	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36534	///
36535	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
36536	#[inline]
36537	#[target_feature(enable = "avx512f")]
36538	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36539	#[cfg_attr(test, assert_instr(vsubss, ROUNDING = `8`))]
36540	#[rustc_legacy_const_generics(`2`)]
36541	pub unsafe fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36542	static_assert_rounding!(ROUNDING);
36543	let a: f32x4 = a.as_f32x4();
36544	let b: f32x4 = b.as_f32x4();
36545	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36546	let r: f32x4 = vsubss(a, b, src:zero, mask:`0b1`, ROUNDING);
36547	transmute(src:r)
36548	}
36549
36550	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36551	///
36552	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36553	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36554	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36555	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36556	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36557	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36558	///
36559	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
36560	#[inline]
36561	#[target_feature(enable = "avx512f")]
36562	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36563	#[cfg_attr(test, assert_instr(vsubss, ROUNDING = `8`))]
36564	#[rustc_legacy_const_generics(`4`)]
36565	pub unsafe fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
36566	src: __m128,
36567	k: __mmask8,
36568	a: __m128,
36569	b: __m128,
36570	) -> __m128 {
36571	static_assert_rounding!(ROUNDING);
36572	let a: f32x4 = a.as_f32x4();
36573	let b: f32x4 = b.as_f32x4();
36574	let src: f32x4 = src.as_f32x4();
36575	let r: f32x4 = vsubss(a, b, src, mask:k, ROUNDING);
36576	transmute(src:r)
36577	}
36578
36579	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36580	///
36581	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36582	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36583	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36584	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36585	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36586	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36587	///
36588	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
36589	#[inline]
36590	#[target_feature(enable = "avx512f")]
36591	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36592	#[cfg_attr(test, assert_instr(vsubss, ROUNDING = `8`))]
36593	#[rustc_legacy_const_generics(`3`)]
36594	pub unsafe fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(
36595	k: __mmask8,
36596	a: __m128,
36597	b: __m128,
36598	) -> __m128 {
36599	static_assert_rounding!(ROUNDING);
36600	let a: f32x4 = a.as_f32x4();
36601	let b: f32x4 = b.as_f32x4();
36602	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36603	let r: f32x4 = vsubss(a, b, src:zero, mask:k, ROUNDING);
36604	transmute(src:r)
36605	}
36606
36607	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36608	///
36609	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36610	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36611	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36612	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36613	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36614	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36615	///
36616	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
36617	#[inline]
36618	#[target_feature(enable = "avx512f")]
36619	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36620	#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = `8`))]
36621	#[rustc_legacy_const_generics(`2`)]
36622	pub unsafe fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
36623	static_assert_rounding!(ROUNDING);
36624	let a: f64x2 = a.as_f64x2();
36625	let b: f64x2 = b.as_f64x2();
36626	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36627	let r: f64x2 = vsubsd(a, b, src:zero, mask:`0b1`, ROUNDING);
36628	transmute(src:r)
36629	}
36630
36631	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36632	///
36633	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36634	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36635	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36636	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36637	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36638	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36639	///
36640	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
36641	#[inline]
36642	#[target_feature(enable = "avx512f")]
36643	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36644	#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = `8`))]
36645	#[rustc_legacy_const_generics(`4`)]
36646	pub unsafe fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
36647	src: __m128d,
36648	k: __mmask8,
36649	a: __m128d,
36650	b: __m128d,
36651	) -> __m128d {
36652	static_assert_rounding!(ROUNDING);
36653	let a: f64x2 = a.as_f64x2();
36654	let b: f64x2 = b.as_f64x2();
36655	let src: f64x2 = src.as_f64x2();
36656	let r: f64x2 = vsubsd(a, b, src, mask:k, ROUNDING);
36657	transmute(src:r)
36658	}
36659
36660	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36661	///
36662	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36663	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36664	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36665	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36666	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36667	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36668	///
36669	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
36670	#[inline]
36671	#[target_feature(enable = "avx512f")]
36672	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36673	#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = `8`))]
36674	#[rustc_legacy_const_generics(`3`)]
36675	pub unsafe fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(
36676	k: __mmask8,
36677	a: __m128d,
36678	b: __m128d,
36679	) -> __m128d {
36680	static_assert_rounding!(ROUNDING);
36681	let a: f64x2 = a.as_f64x2();
36682	let b: f64x2 = b.as_f64x2();
36683	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36684	let r: f64x2 = vsubsd(a, b, src:zero, mask:k, ROUNDING);
36685	transmute(src:r)
36686	}
36687
36688	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36689	///
36690	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36691	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36692	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36693	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36694	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36695	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36696	///
36697	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
36698	#[inline]
36699	#[target_feature(enable = "avx512f")]
36700	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36701	#[cfg_attr(test, assert_instr(vmulss, ROUNDING = `8`))]
36702	#[rustc_legacy_const_generics(`2`)]
36703	pub unsafe fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36704	static_assert_rounding!(ROUNDING);
36705	let a: f32x4 = a.as_f32x4();
36706	let b: f32x4 = b.as_f32x4();
36707	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36708	let r: f32x4 = vmulss(a, b, src:zero, mask:`0b1`, ROUNDING);
36709	transmute(src:r)
36710	}
36711
36712	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36713	///
36714	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36715	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36716	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36717	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36718	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36719	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36720	///
36721	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
36722	#[inline]
36723	#[target_feature(enable = "avx512f")]
36724	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36725	#[cfg_attr(test, assert_instr(vmulss, ROUNDING = `8`))]
36726	#[rustc_legacy_const_generics(`4`)]
36727	pub unsafe fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
36728	src: __m128,
36729	k: __mmask8,
36730	a: __m128,
36731	b: __m128,
36732	) -> __m128 {
36733	static_assert_rounding!(ROUNDING);
36734	let a: f32x4 = a.as_f32x4();
36735	let b: f32x4 = b.as_f32x4();
36736	let src: f32x4 = src.as_f32x4();
36737	let r: f32x4 = vmulss(a, b, src, mask:k, ROUNDING);
36738	transmute(src:r)
36739	}
36740
36741	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36742	///
36743	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36744	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36745	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36746	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36747	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36748	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36749	///
36750	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
36751	#[inline]
36752	#[target_feature(enable = "avx512f")]
36753	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36754	#[cfg_attr(test, assert_instr(vmulss, ROUNDING = `8`))]
36755	#[rustc_legacy_const_generics(`3`)]
36756	pub unsafe fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(
36757	k: __mmask8,
36758	a: __m128,
36759	b: __m128,
36760	) -> __m128 {
36761	static_assert_rounding!(ROUNDING);
36762	let a: f32x4 = a.as_f32x4();
36763	let b: f32x4 = b.as_f32x4();
36764	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36765	let r: f32x4 = vmulss(a, b, src:zero, mask:k, ROUNDING);
36766	transmute(src:r)
36767	}
36768
36769	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36770	///
36771	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36772	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36773	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36774	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36775	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36776	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36777	///
36778	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
36779	#[inline]
36780	#[target_feature(enable = "avx512f")]
36781	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36782	#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = `8`))]
36783	#[rustc_legacy_const_generics(`2`)]
36784	pub unsafe fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
36785	static_assert_rounding!(ROUNDING);
36786	let a: f64x2 = a.as_f64x2();
36787	let b: f64x2 = b.as_f64x2();
36788	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36789	let r: f64x2 = vmulsd(a, b, src:zero, mask:`0b1`, ROUNDING);
36790	transmute(src:r)
36791	}
36792
36793	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36794	///
36795	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36796	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36797	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36798	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36799	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36800	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36801	///
36802	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
36803	#[inline]
36804	#[target_feature(enable = "avx512f")]
36805	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36806	#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = `8`))]
36807	#[rustc_legacy_const_generics(`4`)]
36808	pub unsafe fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
36809	src: __m128d,
36810	k: __mmask8,
36811	a: __m128d,
36812	b: __m128d,
36813	) -> __m128d {
36814	static_assert_rounding!(ROUNDING);
36815	let a: f64x2 = a.as_f64x2();
36816	let b: f64x2 = b.as_f64x2();
36817	let src: f64x2 = src.as_f64x2();
36818	let r: f64x2 = vmulsd(a, b, src, mask:k, ROUNDING);
36819	transmute(src:r)
36820	}
36821
36822	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36823	///
36824	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36825	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36826	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36827	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36828	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36829	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36830	///
36831	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
36832	#[inline]
36833	#[target_feature(enable = "avx512f")]
36834	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36835	#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = `8`))]
36836	#[rustc_legacy_const_generics(`3`)]
36837	pub unsafe fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(
36838	k: __mmask8,
36839	a: __m128d,
36840	b: __m128d,
36841	) -> __m128d {
36842	static_assert_rounding!(ROUNDING);
36843	let a: f64x2 = a.as_f64x2();
36844	let b: f64x2 = b.as_f64x2();
36845	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36846	let r: f64x2 = vmulsd(a, b, src:zero, mask:k, ROUNDING);
36847	transmute(src:r)
36848	}
36849
36850	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36851	///
36852	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36853	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36854	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36855	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36856	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36857	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36858	///
36859	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
36860	#[inline]
36861	#[target_feature(enable = "avx512f")]
36862	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36863	#[cfg_attr(test, assert_instr(vdivss, ROUNDING = `8`))]
36864	#[rustc_legacy_const_generics(`2`)]
36865	pub unsafe fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36866	static_assert_rounding!(ROUNDING);
36867	let a: f32x4 = a.as_f32x4();
36868	let b: f32x4 = b.as_f32x4();
36869	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36870	let r: f32x4 = vdivss(a, b, src:zero, mask:`0b1`, ROUNDING);
36871	transmute(src:r)
36872	}
36873
36874	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36875	///
36876	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36877	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36878	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36879	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36880	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36881	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36882	///
36883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
36884	#[inline]
36885	#[target_feature(enable = "avx512f")]
36886	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36887	#[cfg_attr(test, assert_instr(vdivss, ROUNDING = `8`))]
36888	#[rustc_legacy_const_generics(`4`)]
36889	pub unsafe fn _mm_mask_div_round_ss<const ROUNDING: i32>(
36890	src: __m128,
36891	k: __mmask8,
36892	a: __m128,
36893	b: __m128,
36894	) -> __m128 {
36895	static_assert_rounding!(ROUNDING);
36896	let a: f32x4 = a.as_f32x4();
36897	let b: f32x4 = b.as_f32x4();
36898	let src: f32x4 = src.as_f32x4();
36899	let r: f32x4 = vdivss(a, b, src, mask:k, ROUNDING);
36900	transmute(src:r)
36901	}
36902
36903	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36904	///
36905	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36906	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36907	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36908	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36909	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36910	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36911	///
36912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
36913	#[inline]
36914	#[target_feature(enable = "avx512f")]
36915	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36916	#[cfg_attr(test, assert_instr(vdivss, ROUNDING = `8`))]
36917	#[rustc_legacy_const_generics(`3`)]
36918	pub unsafe fn _mm_maskz_div_round_ss<const ROUNDING: i32>(
36919	k: __mmask8,
36920	a: __m128,
36921	b: __m128,
36922	) -> __m128 {
36923	static_assert_rounding!(ROUNDING);
36924	let a: f32x4 = a.as_f32x4();
36925	let b: f32x4 = b.as_f32x4();
36926	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36927	let r: f32x4 = vdivss(a, b, src:zero, mask:k, ROUNDING);
36928	transmute(src:r)
36929	}
36930
36931	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36932	///
36933	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36934	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36935	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36936	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36937	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36938	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36939	///
36940	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
36941	#[inline]
36942	#[target_feature(enable = "avx512f")]
36943	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36944	#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = `8`))]
36945	#[rustc_legacy_const_generics(`2`)]
36946	pub unsafe fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
36947	static_assert_rounding!(ROUNDING);
36948	let a: f64x2 = a.as_f64x2();
36949	let b: f64x2 = b.as_f64x2();
36950	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36951	let r: f64x2 = vdivsd(a, b, src:zero, mask:`0b1`, ROUNDING);
36952	transmute(src:r)
36953	}
36954
36955	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36956	///
36957	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36958	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36959	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36960	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36961	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36962	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36963	///
36964	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
36965	#[inline]
36966	#[target_feature(enable = "avx512f")]
36967	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36968	#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = `8`))]
36969	#[rustc_legacy_const_generics(`4`)]
36970	pub unsafe fn _mm_mask_div_round_sd<const ROUNDING: i32>(
36971	src: __m128d,
36972	k: __mmask8,
36973	a: __m128d,
36974	b: __m128d,
36975	) -> __m128d {
36976	static_assert_rounding!(ROUNDING);
36977	let a: f64x2 = a.as_f64x2();
36978	let b: f64x2 = b.as_f64x2();
36979	let src: f64x2 = src.as_f64x2();
36980	let r: f64x2 = vdivsd(a, b, src, mask:k, ROUNDING);
36981	transmute(src:r)
36982	}
36983
36984	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36985	///
36986	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36987	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36988	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36989	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36990	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36991	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36992	///
36993	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
36994	#[inline]
36995	#[target_feature(enable = "avx512f")]
36996	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36997	#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = `8`))]
36998	#[rustc_legacy_const_generics(`3`)]
36999	pub unsafe fn _mm_maskz_div_round_sd<const ROUNDING: i32>(
37000	k: __mmask8,
37001	a: __m128d,
37002	b: __m128d,
37003	) -> __m128d {
37004	static_assert_rounding!(ROUNDING);
37005	let a: f64x2 = a.as_f64x2();
37006	let b: f64x2 = b.as_f64x2();
37007	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37008	let r: f64x2 = vdivsd(a, b, src:zero, mask:k, ROUNDING);
37009	transmute(src:r)
37010	}
37011
37012	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37013	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37014	///
37015	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
37016	#[inline]
37017	#[target_feature(enable = "avx512f")]
37018	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37019	#[cfg_attr(test, assert_instr(vmaxss, SAE = `8`))]
37020	#[rustc_legacy_const_generics(`2`)]
37021	pub unsafe fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
37022	static_assert_sae!(SAE);
37023	let a: f32x4 = a.as_f32x4();
37024	let b: f32x4 = b.as_f32x4();
37025	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37026	let r: f32x4 = vmaxss(a, b, src:zero, mask:`0b1`, SAE);
37027	transmute(src:r)
37028	}
37029
37030	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37031	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37032	///
37033	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
37034	#[inline]
37035	#[target_feature(enable = "avx512f")]
37036	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37037	#[cfg_attr(test, assert_instr(vmaxss, SAE = `8`))]
37038	#[rustc_legacy_const_generics(`4`)]
37039	pub unsafe fn _mm_mask_max_round_ss<const SAE: i32>(
37040	src: __m128,
37041	k: __mmask8,
37042	a: __m128,
37043	b: __m128,
37044	) -> __m128 {
37045	static_assert_sae!(SAE);
37046	let a: f32x4 = a.as_f32x4();
37047	let b: f32x4 = b.as_f32x4();
37048	let src: f32x4 = src.as_f32x4();
37049	let r: f32x4 = vmaxss(a, b, src, mask:k, SAE);
37050	transmute(src:r)
37051	}
37052
37053	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37054	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37055	///
37056	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
37057	#[inline]
37058	#[target_feature(enable = "avx512f")]
37059	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37060	#[cfg_attr(test, assert_instr(vmaxss, SAE = `8`))]
37061	#[rustc_legacy_const_generics(`3`)]
37062	pub unsafe fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37063	static_assert_sae!(SAE);
37064	let a: f32x4 = a.as_f32x4();
37065	let b: f32x4 = b.as_f32x4();
37066	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37067	let r: f32x4 = vmaxss(a, b, src:zero, mask:k, SAE);
37068	transmute(src:r)
37069	}
37070
37071	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37072	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37073	///
37074	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
37075	#[inline]
37076	#[target_feature(enable = "avx512f")]
37077	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37078	#[cfg_attr(test, assert_instr(vmaxsd, SAE = `8`))]
37079	#[rustc_legacy_const_generics(`2`)]
37080	pub unsafe fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
37081	static_assert_sae!(SAE);
37082	let a: f64x2 = a.as_f64x2();
37083	let b: f64x2 = b.as_f64x2();
37084	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37085	let r: f64x2 = vmaxsd(a, b, src:zero, mask:`0b1`, SAE);
37086	transmute(src:r)
37087	}
37088
37089	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37090	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37091	///
37092	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
37093	#[inline]
37094	#[target_feature(enable = "avx512f")]
37095	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37096	#[cfg_attr(test, assert_instr(vmaxsd, SAE = `8`))]
37097	#[rustc_legacy_const_generics(`4`)]
37098	pub unsafe fn _mm_mask_max_round_sd<const SAE: i32>(
37099	src: __m128d,
37100	k: __mmask8,
37101	a: __m128d,
37102	b: __m128d,
37103	) -> __m128d {
37104	static_assert_sae!(SAE);
37105	let a: f64x2 = a.as_f64x2();
37106	let b: f64x2 = b.as_f64x2();
37107	let src: f64x2 = src.as_f64x2();
37108	let r: f64x2 = vmaxsd(a, b, src, mask:k, SAE);
37109	transmute(src:r)
37110	}
37111
37112	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37113	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37114	///
37115	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
37116	#[inline]
37117	#[target_feature(enable = "avx512f")]
37118	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37119	#[cfg_attr(test, assert_instr(vmaxsd, SAE = `8`))]
37120	#[rustc_legacy_const_generics(`3`)]
37121	pub unsafe fn _mm_maskz_max_round_sd<const SAE: i32>(
37122	k: __mmask8,
37123	a: __m128d,
37124	b: __m128d,
37125	) -> __m128d {
37126	static_assert_sae!(SAE);
37127	let a: f64x2 = a.as_f64x2();
37128	let b: f64x2 = b.as_f64x2();
37129	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37130	let r: f64x2 = vmaxsd(a, b, src:zero, mask:k, SAE);
37131	transmute(src:r)
37132	}
37133
37134	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37135	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37136	///
37137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
37138	#[inline]
37139	#[target_feature(enable = "avx512f")]
37140	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37141	#[cfg_attr(test, assert_instr(vminss, SAE = `8`))]
37142	#[rustc_legacy_const_generics(`2`)]
37143	pub unsafe fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
37144	static_assert_sae!(SAE);
37145	let a: f32x4 = a.as_f32x4();
37146	let b: f32x4 = b.as_f32x4();
37147	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37148	let r: f32x4 = vminss(a, b, src:zero, mask:`0b1`, SAE);
37149	transmute(src:r)
37150	}
37151
37152	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37153	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37154	///
37155	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_Ss&expand=3780)
37156	#[inline]
37157	#[target_feature(enable = "avx512f")]
37158	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37159	#[cfg_attr(test, assert_instr(vminss, SAE = `8`))]
37160	#[rustc_legacy_const_generics(`4`)]
37161	pub unsafe fn _mm_mask_min_round_ss<const SAE: i32>(
37162	src: __m128,
37163	k: __mmask8,
37164	a: __m128,
37165	b: __m128,
37166	) -> __m128 {
37167	static_assert_sae!(SAE);
37168	let a: f32x4 = a.as_f32x4();
37169	let b: f32x4 = b.as_f32x4();
37170	let src: f32x4 = src.as_f32x4();
37171	let r: f32x4 = vminss(a, b, src, mask:k, SAE);
37172	transmute(src:r)
37173	}
37174
37175	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37176	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37177	///
37178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
37179	#[inline]
37180	#[target_feature(enable = "avx512f")]
37181	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37182	#[cfg_attr(test, assert_instr(vminss, SAE = `8`))]
37183	#[rustc_legacy_const_generics(`3`)]
37184	pub unsafe fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37185	static_assert_sae!(SAE);
37186	let a: f32x4 = a.as_f32x4();
37187	let b: f32x4 = b.as_f32x4();
37188	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37189	let r: f32x4 = vminss(a, b, src:zero, mask:k, SAE);
37190	transmute(src:r)
37191	}
37192
37193	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
37194	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37195	///
37196	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
37197	#[inline]
37198	#[target_feature(enable = "avx512f")]
37199	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37200	#[cfg_attr(test, assert_instr(vminsd, SAE = `8`))]
37201	#[rustc_legacy_const_generics(`2`)]
37202	pub unsafe fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
37203	static_assert_sae!(SAE);
37204	let a: f64x2 = a.as_f64x2();
37205	let b: f64x2 = b.as_f64x2();
37206	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37207	let r: f64x2 = vminsd(a, b, src:zero, mask:`0b1`, SAE);
37208	transmute(src:r)
37209	}
37210
37211	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37212	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37213	///
37214	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
37215	#[inline]
37216	#[target_feature(enable = "avx512f")]
37217	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37218	#[cfg_attr(test, assert_instr(vminsd, SAE = `8`))]
37219	#[rustc_legacy_const_generics(`4`)]
37220	pub unsafe fn _mm_mask_min_round_sd<const SAE: i32>(
37221	src: __m128d,
37222	k: __mmask8,
37223	a: __m128d,
37224	b: __m128d,
37225	) -> __m128d {
37226	static_assert_sae!(SAE);
37227	let a: f64x2 = a.as_f64x2();
37228	let b: f64x2 = b.as_f64x2();
37229	let src: f64x2 = src.as_f64x2();
37230	let r: f64x2 = vminsd(a, b, src, mask:k, SAE);
37231	transmute(src:r)
37232	}
37233
37234	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37235	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37236	///
37237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_Sd&expand=3778)
37238	#[inline]
37239	#[target_feature(enable = "avx512f")]
37240	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37241	#[cfg_attr(test, assert_instr(vminsd, SAE = `8`))]
37242	#[rustc_legacy_const_generics(`3`)]
37243	pub unsafe fn _mm_maskz_min_round_sd<const SAE: i32>(
37244	k: __mmask8,
37245	a: __m128d,
37246	b: __m128d,
37247	) -> __m128d {
37248	static_assert_sae!(SAE);
37249	let a: f64x2 = a.as_f64x2();
37250	let b: f64x2 = b.as_f64x2();
37251	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37252	let r: f64x2 = vminsd(a, b, src:zero, mask:k, SAE);
37253	transmute(src:r)
37254	}
37255
37256	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37257	///
37258	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37259	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37260	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37261	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37262	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37263	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37264	///
37265	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
37266	#[inline]
37267	#[target_feature(enable = "avx512f")]
37268	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37269	#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = `8`))]
37270	#[rustc_legacy_const_generics(`2`)]
37271	pub unsafe fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
37272	static_assert_rounding!(ROUNDING);
37273	let a: f32x4 = a.as_f32x4();
37274	let b: f32x4 = b.as_f32x4();
37275	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37276	let r: f32x4 = vsqrtss(a, b, src:zero, mask:`0b1`, ROUNDING);
37277	transmute(src:r)
37278	}
37279
37280	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37281	///
37282	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37283	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37284	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37285	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37286	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37287	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37288	///
37289	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
37290	#[inline]
37291	#[target_feature(enable = "avx512f")]
37292	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37293	#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = `8`))]
37294	#[rustc_legacy_const_generics(`4`)]
37295	pub unsafe fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
37296	src: __m128,
37297	k: __mmask8,
37298	a: __m128,
37299	b: __m128,
37300	) -> __m128 {
37301	static_assert_rounding!(ROUNDING);
37302	let a: f32x4 = a.as_f32x4();
37303	let b: f32x4 = b.as_f32x4();
37304	let src: f32x4 = src.as_f32x4();
37305	let r: f32x4 = vsqrtss(a, b, src, mask:k, ROUNDING);
37306	transmute(src:r)
37307	}
37308
37309	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37310	///
37311	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37312	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37313	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37314	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37315	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37316	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37317	///
37318	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
37319	#[inline]
37320	#[target_feature(enable = "avx512f")]
37321	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37322	#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = `8`))]
37323	#[rustc_legacy_const_generics(`3`)]
37324	pub unsafe fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(
37325	k: __mmask8,
37326	a: __m128,
37327	b: __m128,
37328	) -> __m128 {
37329	static_assert_rounding!(ROUNDING);
37330	let a: f32x4 = a.as_f32x4();
37331	let b: f32x4 = b.as_f32x4();
37332	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37333	let r: f32x4 = vsqrtss(a, b, src:zero, mask:k, ROUNDING);
37334	transmute(src:r)
37335	}
37336
37337	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37338	///
37339	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37340	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37341	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37342	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37343	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37344	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37345	///
37346	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
37347	#[inline]
37348	#[target_feature(enable = "avx512f")]
37349	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37350	#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = `8`))]
37351	#[rustc_legacy_const_generics(`2`)]
37352	pub unsafe fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
37353	static_assert_rounding!(ROUNDING);
37354	let a: f64x2 = a.as_f64x2();
37355	let b: f64x2 = b.as_f64x2();
37356	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37357	let r: f64x2 = vsqrtsd(a, b, src:zero, mask:`0b1`, ROUNDING);
37358	transmute(src:r)
37359	}
37360
37361	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37362	///
37363	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37364	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37365	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37366	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37367	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37368	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37369	///
37370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
37371	#[inline]
37372	#[target_feature(enable = "avx512f")]
37373	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37374	#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = `8`))]
37375	#[rustc_legacy_const_generics(`4`)]
37376	pub unsafe fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
37377	src: __m128d,
37378	k: __mmask8,
37379	a: __m128d,
37380	b: __m128d,
37381	) -> __m128d {
37382	static_assert_rounding!(ROUNDING);
37383	let a: f64x2 = a.as_f64x2();
37384	let b: f64x2 = b.as_f64x2();
37385	let src: f64x2 = src.as_f64x2();
37386	let r: f64x2 = vsqrtsd(a, b, src, mask:k, ROUNDING);
37387	transmute(src:r)
37388	}
37389
37390	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37391	///
37392	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37393	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37394	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37395	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37396	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37397	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37398	///
37399	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
37400	#[inline]
37401	#[target_feature(enable = "avx512f")]
37402	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37403	#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = `8`))]
37404	#[rustc_legacy_const_generics(`3`)]
37405	pub unsafe fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
37406	k: __mmask8,
37407	a: __m128d,
37408	b: __m128d,
37409	) -> __m128d {
37410	static_assert_rounding!(ROUNDING);
37411	let a: f64x2 = a.as_f64x2();
37412	let b: f64x2 = b.as_f64x2();
37413	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37414	let r: f64x2 = vsqrtsd(a, b, src:zero, mask:k, ROUNDING);
37415	transmute(src:r)
37416	}
37417
37418	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37419	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37420	///
37421	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
37422	#[inline]
37423	#[target_feature(enable = "avx512f")]
37424	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37425	#[cfg_attr(test, assert_instr(vgetexpss, SAE = `8`))]
37426	#[rustc_legacy_const_generics(`2`)]
37427	pub unsafe fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
37428	static_assert_sae!(SAE);
37429	let a: f32x4 = a.as_f32x4();
37430	let b: f32x4 = b.as_f32x4();
37431	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37432	let r: f32x4 = vgetexpss(a, b, src:zero, mask:`0b1`, SAE);
37433	transmute(src:r)
37434	}
37435
37436	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37437	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37438	///
37439	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
37440	#[inline]
37441	#[target_feature(enable = "avx512f")]
37442	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37443	#[cfg_attr(test, assert_instr(vgetexpss, SAE = `8`))]
37444	#[rustc_legacy_const_generics(`4`)]
37445	pub unsafe fn _mm_mask_getexp_round_ss<const SAE: i32>(
37446	src: __m128,
37447	k: __mmask8,
37448	a: __m128,
37449	b: __m128,
37450	) -> __m128 {
37451	static_assert_sae!(SAE);
37452	let a: f32x4 = a.as_f32x4();
37453	let b: f32x4 = b.as_f32x4();
37454	let src: f32x4 = src.as_f32x4();
37455	let r: f32x4 = vgetexpss(a, b, src, mask:k, SAE);
37456	transmute(src:r)
37457	}
37458
37459	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37460	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37461	///
37462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
37463	#[inline]
37464	#[target_feature(enable = "avx512f")]
37465	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37466	#[cfg_attr(test, assert_instr(vgetexpss, SAE = `8`))]
37467	#[rustc_legacy_const_generics(`3`)]
37468	pub unsafe fn _mm_maskz_getexp_round_ss<const SAE: i32>(
37469	k: __mmask8,
37470	a: __m128,
37471	b: __m128,
37472	) -> __m128 {
37473	static_assert_sae!(SAE);
37474	let a: f32x4 = a.as_f32x4();
37475	let b: f32x4 = b.as_f32x4();
37476	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37477	let r: f32x4 = vgetexpss(a, b, src:zero, mask:k, SAE);
37478	transmute(src:r)
37479	}
37480
37481	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37482	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37483	///
37484	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
37485	#[inline]
37486	#[target_feature(enable = "avx512f")]
37487	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37488	#[cfg_attr(test, assert_instr(vgetexpsd, SAE = `8`))]
37489	#[rustc_legacy_const_generics(`2`)]
37490	pub unsafe fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
37491	static_assert_sae!(SAE);
37492	let a: f64x2 = a.as_f64x2();
37493	let b: f64x2 = b.as_f64x2();
37494	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37495	let r: f64x2 = vgetexpsd(a, b, src:zero, mask:`0b1`, SAE);
37496	transmute(src:r)
37497	}
37498
37499	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37500	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37501	///
37502	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
37503	#[inline]
37504	#[target_feature(enable = "avx512f")]
37505	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37506	#[cfg_attr(test, assert_instr(vgetexpsd, SAE = `8`))]
37507	#[rustc_legacy_const_generics(`4`)]
37508	pub unsafe fn _mm_mask_getexp_round_sd<const SAE: i32>(
37509	src: __m128d,
37510	k: __mmask8,
37511	a: __m128d,
37512	b: __m128d,
37513	) -> __m128d {
37514	static_assert_sae!(SAE);
37515	let a: f64x2 = a.as_f64x2();
37516	let b: f64x2 = b.as_f64x2();
37517	let src: f64x2 = src.as_f64x2();
37518	let r: f64x2 = vgetexpsd(a, b, src, mask:k, SAE);
37519	transmute(src:r)
37520	}
37521
37522	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37523	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37524	///
37525	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
37526	#[inline]
37527	#[target_feature(enable = "avx512f")]
37528	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37529	#[cfg_attr(test, assert_instr(vgetexpsd, SAE = `8`))]
37530	#[rustc_legacy_const_generics(`3`)]
37531	pub unsafe fn _mm_maskz_getexp_round_sd<const SAE: i32>(
37532	k: __mmask8,
37533	a: __m128d,
37534	b: __m128d,
37535	) -> __m128d {
37536	static_assert_sae!(SAE);
37537	let a: f64x2 = a.as_f64x2();
37538	let b: f64x2 = b.as_f64x2();
37539	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37540	let r: f64x2 = vgetexpsd(a, b, src:zero, mask:k, SAE);
37541	transmute(src:r)
37542	}
37543
37544	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37545	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37546	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37547	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37548	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37549	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37550	/// The sign is determined by sc which can take the following values:\
37551	/// _MM_MANT_SIGN_src // sign = sign(src)\
37552	/// _MM_MANT_SIGN_zero // sign = 0\
37553	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37554	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37555	///
37556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
37557	#[inline]
37558	#[target_feature(enable = "avx512f")]
37559	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37560	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`, SAE = `4`))]
37561	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
37562	pub unsafe fn _mm_getmant_round_ss<
37563	const NORM: _MM_MANTISSA_NORM_ENUM,
37564	const SIGN: _MM_MANTISSA_SIGN_ENUM,
37565	const SAE: i32,
37566	>(
37567	a: __m128,
37568	b: __m128,
37569	) -> __m128 {
37570	static_assert_uimm_bits!(NORM, `4`);
37571	static_assert_uimm_bits!(SIGN, `2`);
37572	static_assert_mantissas_sae!(SAE);
37573	let a: f32x4 = a.as_f32x4();
37574	let b: f32x4 = b.as_f32x4();
37575	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37576	let r: f32x4 = vgetmantss(a, b, SIGN << `2` \| NORM, src:zero, m:`0b1`, SAE);
37577	transmute(src:r)
37578	}
37579
37580	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37581	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37582	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37583	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37584	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37585	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37586	/// The sign is determined by sc which can take the following values:\
37587	/// _MM_MANT_SIGN_src // sign = sign(src)\
37588	/// _MM_MANT_SIGN_zero // sign = 0\
37589	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37590	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37591	///
37592	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
37593	#[inline]
37594	#[target_feature(enable = "avx512f")]
37595	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37596	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`, SAE = `4`))]
37597	#[rustc_legacy_const_generics(`4`, `5`, `6`)]
37598	pub unsafe fn _mm_mask_getmant_round_ss<
37599	const NORM: _MM_MANTISSA_NORM_ENUM,
37600	const SIGN: _MM_MANTISSA_SIGN_ENUM,
37601	const SAE: i32,
37602	>(
37603	src: __m128,
37604	k: __mmask8,
37605	a: __m128,
37606	b: __m128,
37607	) -> __m128 {
37608	static_assert_uimm_bits!(NORM, `4`);
37609	static_assert_uimm_bits!(SIGN, `2`);
37610	static_assert_mantissas_sae!(SAE);
37611	let a: f32x4 = a.as_f32x4();
37612	let b: f32x4 = b.as_f32x4();
37613	let src: f32x4 = src.as_f32x4();
37614	let r: f32x4 = vgetmantss(a, b, SIGN << `2` \| NORM, src, m:k, SAE);
37615	transmute(src:r)
37616	}
37617
37618	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37619	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37620	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37621	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37622	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37623	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37624	/// The sign is determined by sc which can take the following values:\
37625	/// _MM_MANT_SIGN_src // sign = sign(src)\
37626	/// _MM_MANT_SIGN_zero // sign = 0\
37627	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37628	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37629	///
37630	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
37631	#[inline]
37632	#[target_feature(enable = "avx512f")]
37633	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37634	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`, SAE = `4`))]
37635	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
37636	pub unsafe fn _mm_maskz_getmant_round_ss<
37637	const NORM: _MM_MANTISSA_NORM_ENUM,
37638	const SIGN: _MM_MANTISSA_SIGN_ENUM,
37639	const SAE: i32,
37640	>(
37641	k: __mmask8,
37642	a: __m128,
37643	b: __m128,
37644	) -> __m128 {
37645	static_assert_uimm_bits!(NORM, `4`);
37646	static_assert_uimm_bits!(SIGN, `2`);
37647	static_assert_mantissas_sae!(SAE);
37648	let a: f32x4 = a.as_f32x4();
37649	let b: f32x4 = b.as_f32x4();
37650	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37651	let r: f32x4 = vgetmantss(a, b, SIGN << `2` \| NORM, src:zero, m:k, SAE);
37652	transmute(src:r)
37653	}
37654
37655	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37656	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37657	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37658	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37659	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37660	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37661	/// The sign is determined by sc which can take the following values:\
37662	/// _MM_MANT_SIGN_src // sign = sign(src)\
37663	/// _MM_MANT_SIGN_zero // sign = 0\
37664	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37665	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37666	///
37667	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
37668	#[inline]
37669	#[target_feature(enable = "avx512f")]
37670	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37671	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`, SAE = `4`))]
37672	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
37673	pub unsafe fn _mm_getmant_round_sd<
37674	const NORM: _MM_MANTISSA_NORM_ENUM,
37675	const SIGN: _MM_MANTISSA_SIGN_ENUM,
37676	const SAE: i32,
37677	>(
37678	a: __m128d,
37679	b: __m128d,
37680	) -> __m128d {
37681	static_assert_uimm_bits!(NORM, `4`);
37682	static_assert_uimm_bits!(SIGN, `2`);
37683	static_assert_mantissas_sae!(SAE);
37684	let a: f64x2 = a.as_f64x2();
37685	let b: f64x2 = b.as_f64x2();
37686	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37687	let r: f64x2 = vgetmantsd(a, b, SIGN << `2` \| NORM, src:zero, m:`0b1`, SAE);
37688	transmute(src:r)
37689	}
37690
37691	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37692	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37693	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37694	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37695	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37696	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37697	/// The sign is determined by sc which can take the following values:\
37698	/// _MM_MANT_SIGN_src // sign = sign(src)\
37699	/// _MM_MANT_SIGN_zero // sign = 0\
37700	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37701	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37702	///
37703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
37704	#[inline]
37705	#[target_feature(enable = "avx512f")]
37706	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37707	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`, SAE = `4`))]
37708	#[rustc_legacy_const_generics(`4`, `5`, `6`)]
37709	pub unsafe fn _mm_mask_getmant_round_sd<
37710	const NORM: _MM_MANTISSA_NORM_ENUM,
37711	const SIGN: _MM_MANTISSA_SIGN_ENUM,
37712	const SAE: i32,
37713	>(
37714	src: __m128d,
37715	k: __mmask8,
37716	a: __m128d,
37717	b: __m128d,
37718	) -> __m128d {
37719	static_assert_uimm_bits!(NORM, `4`);
37720	static_assert_uimm_bits!(SIGN, `2`);
37721	static_assert_mantissas_sae!(SAE);
37722	let a: f64x2 = a.as_f64x2();
37723	let b: f64x2 = b.as_f64x2();
37724	let src: f64x2 = src.as_f64x2();
37725	let r: f64x2 = vgetmantsd(a, b, SIGN << `2` \| NORM, src, m:k, SAE);
37726	transmute(src:r)
37727	}
37728
37729	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37730	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37731	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37732	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37733	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37734	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37735	/// The sign is determined by sc which can take the following values:\
37736	/// _MM_MANT_SIGN_src // sign = sign(src)\
37737	/// _MM_MANT_SIGN_zero // sign = 0\
37738	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37739	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37740	///
37741	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
37742	#[inline]
37743	#[target_feature(enable = "avx512f")]
37744	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37745	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`, SAE = `4`))]
37746	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
37747	pub unsafe fn _mm_maskz_getmant_round_sd<
37748	const NORM: _MM_MANTISSA_NORM_ENUM,
37749	const SIGN: _MM_MANTISSA_SIGN_ENUM,
37750	const SAE: i32,
37751	>(
37752	k: __mmask8,
37753	a: __m128d,
37754	b: __m128d,
37755	) -> __m128d {
37756	static_assert_uimm_bits!(NORM, `4`);
37757	static_assert_uimm_bits!(SIGN, `2`);
37758	static_assert_mantissas_sae!(SAE);
37759	let a: f64x2 = a.as_f64x2();
37760	let b: f64x2 = b.as_f64x2();
37761	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37762	let r: f64x2 = vgetmantsd(a, b, SIGN << `2` \| NORM, src:zero, m:k, SAE);
37763	transmute(src:r)
37764	}
37765
37766	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37767	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37768	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
37769	/// _MM_FROUND_TO_NEG_INF // round down\
37770	/// _MM_FROUND_TO_POS_INF // round up\
37771	/// _MM_FROUND_TO_ZERO // truncate\
37772	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
37773	///
37774	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
37776	#[inline]
37777	#[target_feature(enable = "avx512f")]
37778	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37779	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`, SAE = `8`))]
37780	#[rustc_legacy_const_generics(`2`, `3`)]
37781	pub unsafe fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
37782	a: __m128,
37783	b: __m128,
37784	) -> __m128 {
37785	static_assert_uimm_bits!(IMM8, `8`);
37786	static_assert_mantissas_sae!(SAE);
37787	let a: f32x4 = a.as_f32x4();
37788	let b: f32x4 = b.as_f32x4();
37789	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37790	let r: f32x4 = vrndscaless(a, b, src:zero, mask:`0b11111111`, IMM8, SAE);
37791	transmute(src:r)
37792	}
37793
37794	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37795	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37796	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
37797	/// _MM_FROUND_TO_NEG_INF // round down\
37798	/// _MM_FROUND_TO_POS_INF // round up\
37799	/// _MM_FROUND_TO_ZERO // truncate\
37800	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
37801	///
37802	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37803	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
37804	#[inline]
37805	#[target_feature(enable = "avx512f")]
37806	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37807	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`, SAE = `8`))]
37808	#[rustc_legacy_const_generics(`4`, `5`)]
37809	pub unsafe fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
37810	src: __m128,
37811	k: __mmask8,
37812	a: __m128,
37813	b: __m128,
37814	) -> __m128 {
37815	static_assert_uimm_bits!(IMM8, `8`);
37816	static_assert_mantissas_sae!(SAE);
37817	let a: f32x4 = a.as_f32x4();
37818	let b: f32x4 = b.as_f32x4();
37819	let src: f32x4 = src.as_f32x4();
37820	let r: f32x4 = vrndscaless(a, b, src, mask:k, IMM8, SAE);
37821	transmute(src:r)
37822	}
37823
37824	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37825	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37826	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
37827	/// _MM_FROUND_TO_NEG_INF // round down\
37828	/// _MM_FROUND_TO_POS_INF // round up\
37829	/// _MM_FROUND_TO_ZERO // truncate\
37830	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
37831	///
37832	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
37834	#[inline]
37835	#[target_feature(enable = "avx512f")]
37836	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37837	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`, SAE = `8`))]
37838	#[rustc_legacy_const_generics(`3`, `4`)]
37839	pub unsafe fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
37840	k: __mmask8,
37841	a: __m128,
37842	b: __m128,
37843	) -> __m128 {
37844	static_assert_uimm_bits!(IMM8, `8`);
37845	static_assert_mantissas_sae!(SAE);
37846	let a: f32x4 = a.as_f32x4();
37847	let b: f32x4 = b.as_f32x4();
37848	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37849	let r: f32x4 = vrndscaless(a, b, src:zero, mask:k, IMM8, SAE);
37850	transmute(src:r)
37851	}
37852
37853	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37854	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37855	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
37856	/// _MM_FROUND_TO_NEG_INF // round down\
37857	/// _MM_FROUND_TO_POS_INF // round up\
37858	/// _MM_FROUND_TO_ZERO // truncate\
37859	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
37860	///
37861	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37862	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
37863	#[inline]
37864	#[target_feature(enable = "avx512f")]
37865	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37866	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`, SAE = `8`))]
37867	#[rustc_legacy_const_generics(`2`, `3`)]
37868	pub unsafe fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
37869	a: __m128d,
37870	b: __m128d,
37871	) -> __m128d {
37872	static_assert_uimm_bits!(IMM8, `8`);
37873	static_assert_mantissas_sae!(SAE);
37874	let a: f64x2 = a.as_f64x2();
37875	let b: f64x2 = b.as_f64x2();
37876	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37877	let r: f64x2 = vrndscalesd(a, b, src:zero, mask:`0b11111111`, IMM8, SAE);
37878	transmute(src:r)
37879	}
37880
37881	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37882	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37883	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
37884	/// _MM_FROUND_TO_NEG_INF // round down\
37885	/// _MM_FROUND_TO_POS_INF // round up\
37886	/// _MM_FROUND_TO_ZERO // truncate\
37887	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
37888	///
37889	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37890	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
37891	#[inline]
37892	#[target_feature(enable = "avx512f")]
37893	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37894	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`, SAE = `8`))]
37895	#[rustc_legacy_const_generics(`4`, `5`)]
37896	pub unsafe fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
37897	src: __m128d,
37898	k: __mmask8,
37899	a: __m128d,
37900	b: __m128d,
37901	) -> __m128d {
37902	static_assert_uimm_bits!(IMM8, `8`);
37903	static_assert_mantissas_sae!(SAE);
37904	let a: f64x2 = a.as_f64x2();
37905	let b: f64x2 = b.as_f64x2();
37906	let src: f64x2 = src.as_f64x2();
37907	let r: f64x2 = vrndscalesd(a, b, src, mask:k, IMM8, SAE);
37908	transmute(src:r)
37909	}
37910
37911	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37912	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37913	/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
37914	/// _MM_FROUND_TO_NEG_INF // round down\
37915	/// _MM_FROUND_TO_POS_INF // round up\
37916	/// _MM_FROUND_TO_ZERO // truncate\
37917	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
37918	///
37919	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37920	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
37921	#[inline]
37922	#[target_feature(enable = "avx512f")]
37923	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37924	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`, SAE = `8`))]
37925	#[rustc_legacy_const_generics(`3`, `4`)]
37926	pub unsafe fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
37927	k: __mmask8,
37928	a: __m128d,
37929	b: __m128d,
37930	) -> __m128d {
37931	static_assert_uimm_bits!(IMM8, `8`);
37932	static_assert_mantissas_sae!(SAE);
37933	let a: f64x2 = a.as_f64x2();
37934	let b: f64x2 = b.as_f64x2();
37935	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37936	let r: f64x2 = vrndscalesd(a, b, src:zero, mask:k, IMM8, SAE);
37937	transmute(src:r)
37938	}
37939
37940	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37941	///
37942	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37943	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37944	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37945	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37946	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37947	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37948	///
37949	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
37950	#[inline]
37951	#[target_feature(enable = "avx512f")]
37952	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37953	#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = `8`))]
37954	#[rustc_legacy_const_generics(`2`)]
37955	pub unsafe fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
37956	static_assert_rounding!(ROUNDING);
37957	let a: f32x4 = a.as_f32x4();
37958	let b: f32x4 = b.as_f32x4();
37959	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37960	let r: f32x4 = vscalefss(a, b, src:zero, mask:`0b11111111`, ROUNDING);
37961	transmute(src:r)
37962	}
37963
37964	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37965	///
37966	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37967	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37968	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37969	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37970	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37971	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37972	///
37973	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
37974	#[inline]
37975	#[target_feature(enable = "avx512f")]
37976	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37977	#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = `8`))]
37978	#[rustc_legacy_const_generics(`4`)]
37979	pub unsafe fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
37980	src: __m128,
37981	k: __mmask8,
37982	a: __m128,
37983	b: __m128,
37984	) -> __m128 {
37985	static_assert_rounding!(ROUNDING);
37986	let a: f32x4 = a.as_f32x4();
37987	let b: f32x4 = b.as_f32x4();
37988	let src: f32x4 = src.as_f32x4();
37989	let r: f32x4 = vscalefss(a, b, src, mask:k, ROUNDING);
37990	transmute(src:r)
37991	}
37992
37993	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37994	///
37995	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37996	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37997	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37998	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37999	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38000	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38001	///
38002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
38003	#[inline]
38004	#[target_feature(enable = "avx512f")]
38005	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38006	#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = `8`))]
38007	#[rustc_legacy_const_generics(`3`)]
38008	pub unsafe fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(
38009	k: __mmask8,
38010	a: __m128,
38011	b: __m128,
38012	) -> __m128 {
38013	static_assert_rounding!(ROUNDING);
38014	let a: f32x4 = a.as_f32x4();
38015	let b: f32x4 = b.as_f32x4();
38016	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
38017	let r: f32x4 = vscalefss(a, b, src:zero, mask:k, ROUNDING);
38018	transmute(src:r)
38019	}
38020
38021	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38022	///
38023	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38024	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38025	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38026	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38027	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38028	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38029	///
38030	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
38031	#[inline]
38032	#[target_feature(enable = "avx512f")]
38033	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38034	#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = `8`))]
38035	#[rustc_legacy_const_generics(`2`)]
38036	pub unsafe fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38037	static_assert_rounding!(ROUNDING);
38038	let a: f64x2 = a.as_f64x2();
38039	let b: f64x2 = b.as_f64x2();
38040	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
38041	let r: f64x2 = vscalefsd(a, b, src:zero, mask:`0b11111111`, ROUNDING);
38042	transmute(src:r)
38043	}
38044
38045	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38046	///
38047	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38048	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38049	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38050	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38051	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38052	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38053	///
38054	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
38055	#[inline]
38056	#[target_feature(enable = "avx512f")]
38057	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38058	#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = `8`))]
38059	#[rustc_legacy_const_generics(`4`)]
38060	pub unsafe fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
38061	src: __m128d,
38062	k: __mmask8,
38063	a: __m128d,
38064	b: __m128d,
38065	) -> __m128d {
38066	let a: f64x2 = a.as_f64x2();
38067	let b: f64x2 = b.as_f64x2();
38068	let src: f64x2 = src.as_f64x2();
38069	let r: f64x2 = vscalefsd(a, b, src, mask:k, ROUNDING);
38070	transmute(src:r)
38071	}
38072
38073	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38074	///
38075	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38076	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38077	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38078	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38079	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38080	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38081	///
38082	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
38083	#[inline]
38084	#[target_feature(enable = "avx512f")]
38085	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38086	#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = `8`))]
38087	#[rustc_legacy_const_generics(`3`)]
38088	pub unsafe fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
38089	k: __mmask8,
38090	a: __m128d,
38091	b: __m128d,
38092	) -> __m128d {
38093	static_assert_rounding!(ROUNDING);
38094	let a: f64x2 = a.as_f64x2();
38095	let b: f64x2 = b.as_f64x2();
38096	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
38097	let r: f64x2 = vscalefsd(a, b, src:zero, mask:k, ROUNDING);
38098	transmute(src:r)
38099	}
38100
38101	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38102	///
38103	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38104	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38105	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38106	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38107	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38108	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38109	///
38110	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
38111	#[inline]
38112	#[target_feature(enable = "avx512f")]
38113	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38114	#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = `8`))]
38115	#[rustc_legacy_const_generics(`3`)]
38116	pub unsafe fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
38117	static_assert_rounding!(ROUNDING);
38118	let extracta: f32 = simd_extract!(a, `0`);
38119	let extractb: f32 = simd_extract!(b, `0`);
38120	let extractc: f32 = simd_extract!(c, `0`);
38121	let r: f32 = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38122	simd_insert!(a, `0`, r)
38123	}
38124
38125	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38126	///
38127	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38128	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38129	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38130	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38131	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38132	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38133	///
38134	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
38135	#[inline]
38136	#[target_feature(enable = "avx512f")]
38137	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38138	#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = `8`))]
38139	#[rustc_legacy_const_generics(`4`)]
38140	pub unsafe fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
38141	a: __m128,
38142	k: __mmask8,
38143	b: __m128,
38144	c: __m128,
38145	) -> __m128 {
38146	static_assert_rounding!(ROUNDING);
38147	let mut fmadd: f32 = simd_extract!(a, `0`);
38148	if (k & `0b00000001`) != `0` {
38149	let extractb: f32 = simd_extract!(b, `0`);
38150	let extractc: f32 = simd_extract!(c, `0`);
38151	fmadd = vfmadd132ss(a:fmadd, b:extractb, c:extractc, ROUNDING);
38152	}
38153	simd_insert!(a, `0`, fmadd)
38154	}
38155
38156	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38157	///
38158	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38159	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38160	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38161	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38162	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38163	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38164	///
38165	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
38166	#[inline]
38167	#[target_feature(enable = "avx512f")]
38168	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38169	#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = `8`))]
38170	#[rustc_legacy_const_generics(`4`)]
38171	pub unsafe fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
38172	k: __mmask8,
38173	a: __m128,
38174	b: __m128,
38175	c: __m128,
38176	) -> __m128 {
38177	static_assert_rounding!(ROUNDING);
38178	let mut fmadd: f32 = `0.`;
38179	if (k & `0b00000001`) != `0` {
38180	let extracta: f32 = simd_extract!(a, `0`);
38181	let extractb: f32 = simd_extract!(b, `0`);
38182	let extractc: f32 = simd_extract!(c, `0`);
38183	fmadd = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38184	}
38185	simd_insert!(a, `0`, fmadd)
38186	}
38187
38188	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
38189	///
38190	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38191	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38192	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38193	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38194	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38195	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38196	///
38197	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
38198	#[inline]
38199	#[target_feature(enable = "avx512f")]
38200	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38201	#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = `8`))]
38202	#[rustc_legacy_const_generics(`4`)]
38203	pub unsafe fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
38204	a: __m128,
38205	b: __m128,
38206	c: __m128,
38207	k: __mmask8,
38208	) -> __m128 {
38209	static_assert_rounding!(ROUNDING);
38210	let mut fmadd: f32 = simd_extract!(c, `0`);
38211	if (k & `0b00000001`) != `0` {
38212	let extracta: f32 = simd_extract!(a, `0`);
38213	let extractb: f32 = simd_extract!(b, `0`);
38214	fmadd = vfmadd132ss(a:extracta, b:extractb, c:fmadd, ROUNDING);
38215	}
38216	simd_insert!(c, `0`, fmadd)
38217	}
38218
38219	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38220	///
38221	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38222	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38223	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38224	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38225	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38226	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38227	///
38228	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
38229	#[inline]
38230	#[target_feature(enable = "avx512f")]
38231	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38232	#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = `8`))]
38233	#[rustc_legacy_const_generics(`3`)]
38234	pub unsafe fn _mm_fmadd_round_sd<const ROUNDING: i32>(
38235	a: __m128d,
38236	b: __m128d,
38237	c: __m128d,
38238	) -> __m128d {
38239	static_assert_rounding!(ROUNDING);
38240	let extracta: f64 = simd_extract!(a, `0`);
38241	let extractb: f64 = simd_extract!(b, `0`);
38242	let extractc: f64 = simd_extract!(c, `0`);
38243	let fmadd: f64 = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38244	simd_insert!(a, `0`, fmadd)
38245	}
38246
38247	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38248	///
38249	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38250	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38251	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38252	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38253	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38254	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38255	///
38256	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
38257	#[inline]
38258	#[target_feature(enable = "avx512f")]
38259	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38260	#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = `8`))]
38261	#[rustc_legacy_const_generics(`4`)]
38262	pub unsafe fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
38263	a: __m128d,
38264	k: __mmask8,
38265	b: __m128d,
38266	c: __m128d,
38267	) -> __m128d {
38268	static_assert_rounding!(ROUNDING);
38269	let mut fmadd: f64 = simd_extract!(a, `0`);
38270	if (k & `0b00000001`) != `0` {
38271	let extractb: f64 = simd_extract!(b, `0`);
38272	let extractc: f64 = simd_extract!(c, `0`);
38273	fmadd = vfmadd132sd(a:fmadd, b:extractb, c:extractc, ROUNDING);
38274	}
38275	simd_insert!(a, `0`, fmadd)
38276	}
38277
38278	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38279	///
38280	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38281	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38282	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38283	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38284	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38285	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38286	///
38287	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
38288	#[inline]
38289	#[target_feature(enable = "avx512f")]
38290	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38291	#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = `8`))]
38292	#[rustc_legacy_const_generics(`4`)]
38293	pub unsafe fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
38294	k: __mmask8,
38295	a: __m128d,
38296	b: __m128d,
38297	c: __m128d,
38298	) -> __m128d {
38299	static_assert_rounding!(ROUNDING);
38300	let mut fmadd: f64 = `0.`;
38301	if (k & `0b00000001`) != `0` {
38302	let extracta: f64 = simd_extract!(a, `0`);
38303	let extractb: f64 = simd_extract!(b, `0`);
38304	let extractc: f64 = simd_extract!(c, `0`);
38305	fmadd = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38306	}
38307	simd_insert!(a, `0`, fmadd)
38308	}
38309
38310	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
38311	///
38312	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38313	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38314	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38315	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38316	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38317	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38318	///
38319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_Sd&expand=2571)
38320	#[inline]
38321	#[target_feature(enable = "avx512f")]
38322	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38323	#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = `8`))]
38324	#[rustc_legacy_const_generics(`4`)]
38325	pub unsafe fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
38326	a: __m128d,
38327	b: __m128d,
38328	c: __m128d,
38329	k: __mmask8,
38330	) -> __m128d {
38331	static_assert_rounding!(ROUNDING);
38332	let mut fmadd: f64 = simd_extract!(c, `0`);
38333	if (k & `0b00000001`) != `0` {
38334	let extracta: f64 = simd_extract!(a, `0`);
38335	let extractb: f64 = simd_extract!(b, `0`);
38336	fmadd = vfmadd132sd(a:extracta, b:extractb, c:fmadd, ROUNDING);
38337	}
38338	simd_insert!(c, `0`, fmadd)
38339	}
38340
38341	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38342	///
38343	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38344	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38345	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38346	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38347	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38348	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38349	///
38350	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
38351	#[inline]
38352	#[target_feature(enable = "avx512f")]
38353	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38354	#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = `8`))]
38355	#[rustc_legacy_const_generics(`3`)]
38356	pub unsafe fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
38357	static_assert_rounding!(ROUNDING);
38358	let extracta: f32 = simd_extract!(a, `0`);
38359	let extractb: f32 = simd_extract!(b, `0`);
38360	let extractc: f32 = simd_extract!(c, `0`);
38361	let extractc: f32 = -extractc;
38362	let fmsub: f32 = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38363	simd_insert!(a, `0`, fmsub)
38364	}
38365
38366	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38367	///
38368	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38369	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38370	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38371	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38372	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38373	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38374	///
38375	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
38376	#[inline]
38377	#[target_feature(enable = "avx512f")]
38378	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38379	#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = `8`))]
38380	#[rustc_legacy_const_generics(`4`)]
38381	pub unsafe fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
38382	a: __m128,
38383	k: __mmask8,
38384	b: __m128,
38385	c: __m128,
38386	) -> __m128 {
38387	static_assert_rounding!(ROUNDING);
38388	let mut fmsub: f32 = simd_extract!(a, `0`);
38389	if (k & `0b00000001`) != `0` {
38390	let extractb: f32 = simd_extract!(b, `0`);
38391	let extractc: f32 = simd_extract!(c, `0`);
38392	let extractc: f32 = -extractc;
38393	fmsub = vfmadd132ss(a:fmsub, b:extractb, c:extractc, ROUNDING);
38394	}
38395	simd_insert!(a, `0`, fmsub)
38396	}
38397
38398	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38399	///
38400	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38401	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38402	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38403	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38404	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38405	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38406	///
38407	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
38408	#[inline]
38409	#[target_feature(enable = "avx512f")]
38410	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38411	#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = `8`))]
38412	#[rustc_legacy_const_generics(`4`)]
38413	pub unsafe fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
38414	k: __mmask8,
38415	a: __m128,
38416	b: __m128,
38417	c: __m128,
38418	) -> __m128 {
38419	static_assert_rounding!(ROUNDING);
38420	let mut fmsub: f32 = `0.`;
38421	if (k & `0b00000001`) != `0` {
38422	let extracta: f32 = simd_extract!(a, `0`);
38423	let extractb: f32 = simd_extract!(b, `0`);
38424	let extractc: f32 = simd_extract!(c, `0`);
38425	let extractc: f32 = -extractc;
38426	fmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38427	}
38428	simd_insert!(a, `0`, fmsub)
38429	}
38430
38431	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
38432	///
38433	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38434	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38435	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38436	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38437	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38438	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38439	///
38440	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
38441	#[inline]
38442	#[target_feature(enable = "avx512f")]
38443	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38444	#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = `8`))]
38445	#[rustc_legacy_const_generics(`4`)]
38446	pub unsafe fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
38447	a: __m128,
38448	b: __m128,
38449	c: __m128,
38450	k: __mmask8,
38451	) -> __m128 {
38452	static_assert_rounding!(ROUNDING);
38453	let mut fmsub: f32 = simd_extract!(c, `0`);
38454	if (k & `0b00000001`) != `0` {
38455	let extracta: f32 = simd_extract!(a, `0`);
38456	let extractb: f32 = simd_extract!(b, `0`);
38457	let extractc: f32 = -fmsub;
38458	fmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38459	}
38460	simd_insert!(c, `0`, fmsub)
38461	}
38462
38463	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38464	///
38465	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38466	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38467	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38468	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38469	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38470	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38471	///
38472	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
38473	#[inline]
38474	#[target_feature(enable = "avx512f")]
38475	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38476	#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = `8`))]
38477	#[rustc_legacy_const_generics(`3`)]
38478	pub unsafe fn _mm_fmsub_round_sd<const ROUNDING: i32>(
38479	a: __m128d,
38480	b: __m128d,
38481	c: __m128d,
38482	) -> __m128d {
38483	static_assert_rounding!(ROUNDING);
38484	let extracta: f64 = simd_extract!(a, `0`);
38485	let extractb: f64 = simd_extract!(b, `0`);
38486	let extractc: f64 = simd_extract!(c, `0`);
38487	let extractc: f64 = -extractc;
38488	let fmsub: f64 = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38489	simd_insert!(a, `0`, fmsub)
38490	}
38491
38492	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38493	///
38494	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38495	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38496	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38497	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38498	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38499	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38500	///
38501	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
38502	#[inline]
38503	#[target_feature(enable = "avx512f")]
38504	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38505	#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = `8`))]
38506	#[rustc_legacy_const_generics(`4`)]
38507	pub unsafe fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
38508	a: __m128d,
38509	k: __mmask8,
38510	b: __m128d,
38511	c: __m128d,
38512	) -> __m128d {
38513	static_assert_rounding!(ROUNDING);
38514	let mut fmsub: f64 = simd_extract!(a, `0`);
38515	if (k & `0b00000001`) != `0` {
38516	let extractb: f64 = simd_extract!(b, `0`);
38517	let extractc: f64 = simd_extract!(c, `0`);
38518	let extractc: f64 = -extractc;
38519	fmsub = vfmadd132sd(a:fmsub, b:extractb, c:extractc, ROUNDING);
38520	}
38521	simd_insert!(a, `0`, fmsub)
38522	}
38523
38524	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38525	///
38526	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38527	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38528	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38529	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38530	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38531	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38532	///
38533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
38534	#[inline]
38535	#[target_feature(enable = "avx512f")]
38536	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38537	#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = `8`))]
38538	#[rustc_legacy_const_generics(`4`)]
38539	pub unsafe fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
38540	k: __mmask8,
38541	a: __m128d,
38542	b: __m128d,
38543	c: __m128d,
38544	) -> __m128d {
38545	static_assert_rounding!(ROUNDING);
38546	let mut fmsub: f64 = `0.`;
38547	if (k & `0b00000001`) != `0` {
38548	let extracta: f64 = simd_extract!(a, `0`);
38549	let extractb: f64 = simd_extract!(b, `0`);
38550	let extractc: f64 = simd_extract!(c, `0`);
38551	let extractc: f64 = -extractc;
38552	fmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38553	}
38554	simd_insert!(a, `0`, fmsub)
38555	}
38556
38557	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
38558	///
38559	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38560	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38561	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38562	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38563	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38564	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38565	///
38566	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
38567	#[inline]
38568	#[target_feature(enable = "avx512f")]
38569	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38570	#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = `8`))]
38571	#[rustc_legacy_const_generics(`4`)]
38572	pub unsafe fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
38573	a: __m128d,
38574	b: __m128d,
38575	c: __m128d,
38576	k: __mmask8,
38577	) -> __m128d {
38578	static_assert_rounding!(ROUNDING);
38579	let mut fmsub: f64 = simd_extract!(c, `0`);
38580	if (k & `0b00000001`) != `0` {
38581	let extracta: f64 = simd_extract!(a, `0`);
38582	let extractb: f64 = simd_extract!(b, `0`);
38583	let extractc: f64 = -fmsub;
38584	fmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38585	}
38586	simd_insert!(c, `0`, fmsub)
38587	}
38588
38589	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38590	///
38591	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38592	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38593	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38594	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38595	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38596	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38597	///
38598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
38599	#[inline]
38600	#[target_feature(enable = "avx512f")]
38601	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38602	#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = `8`))]
38603	#[rustc_legacy_const_generics(`3`)]
38604	pub unsafe fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
38605	static_assert_rounding!(ROUNDING);
38606	let extracta: f32 = simd_extract!(a, `0`);
38607	let extracta: f32 = -extracta;
38608	let extractb: f32 = simd_extract!(b, `0`);
38609	let extractc: f32 = simd_extract!(c, `0`);
38610	let fnmadd: f32 = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38611	simd_insert!(a, `0`, fnmadd)
38612	}
38613
38614	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38615	///
38616	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38617	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38618	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38619	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38620	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38621	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38622	///
38623	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
38624	#[inline]
38625	#[target_feature(enable = "avx512f")]
38626	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38627	#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = `8`))]
38628	#[rustc_legacy_const_generics(`4`)]
38629	pub unsafe fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
38630	a: __m128,
38631	k: __mmask8,
38632	b: __m128,
38633	c: __m128,
38634	) -> __m128 {
38635	static_assert_rounding!(ROUNDING);
38636	let mut fnmadd: f32 = simd_extract!(a, `0`);
38637	if (k & `0b00000001`) != `0` {
38638	let extracta: f32 = -fnmadd;
38639	let extractb: f32 = simd_extract!(b, `0`);
38640	let extractc: f32 = simd_extract!(c, `0`);
38641	fnmadd = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38642	}
38643	simd_insert!(a, `0`, fnmadd)
38644	}
38645
38646	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38647	///
38648	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38649	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38650	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38651	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38652	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38653	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38654	///
38655	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
38656	#[inline]
38657	#[target_feature(enable = "avx512f")]
38658	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38659	#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = `8`))]
38660	#[rustc_legacy_const_generics(`4`)]
38661	pub unsafe fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
38662	k: __mmask8,
38663	a: __m128,
38664	b: __m128,
38665	c: __m128,
38666	) -> __m128 {
38667	static_assert_rounding!(ROUNDING);
38668	let mut fnmadd: f32 = `0.`;
38669	if (k & `0b00000001`) != `0` {
38670	let extracta: f32 = simd_extract!(a, `0`);
38671	let extracta: f32 = -extracta;
38672	let extractb: f32 = simd_extract!(b, `0`);
38673	let extractc: f32 = simd_extract!(c, `0`);
38674	fnmadd = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38675	}
38676	simd_insert!(a, `0`, fnmadd)
38677	}
38678
38679	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
38680	///
38681	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38682	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38683	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38684	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38685	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38686	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38687	///
38688	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
38689	#[inline]
38690	#[target_feature(enable = "avx512f")]
38691	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38692	#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = `8`))]
38693	#[rustc_legacy_const_generics(`4`)]
38694	pub unsafe fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
38695	a: __m128,
38696	b: __m128,
38697	c: __m128,
38698	k: __mmask8,
38699	) -> __m128 {
38700	static_assert_rounding!(ROUNDING);
38701	let mut fnmadd: f32 = simd_extract!(c, `0`);
38702	if (k & `0b00000001`) != `0` {
38703	let extracta: f32 = simd_extract!(a, `0`);
38704	let extracta: f32 = -extracta;
38705	let extractb: f32 = simd_extract!(b, `0`);
38706	fnmadd = vfmadd132ss(a:extracta, b:extractb, c:fnmadd, ROUNDING);
38707	}
38708	simd_insert!(c, `0`, fnmadd)
38709	}
38710
38711	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38712	///
38713	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38714	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38715	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38716	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38717	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38718	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38719	///
38720	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
38721	#[inline]
38722	#[target_feature(enable = "avx512f")]
38723	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38724	#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = `8`))]
38725	#[rustc_legacy_const_generics(`3`)]
38726	pub unsafe fn _mm_fnmadd_round_sd<const ROUNDING: i32>(
38727	a: __m128d,
38728	b: __m128d,
38729	c: __m128d,
38730	) -> __m128d {
38731	static_assert_rounding!(ROUNDING);
38732	let extracta: f64 = simd_extract!(a, `0`);
38733	let extracta: f64 = -extracta;
38734	let extractb: f64 = simd_extract!(b, `0`);
38735	let extractc: f64 = simd_extract!(c, `0`);
38736	let fnmadd: f64 = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38737	simd_insert!(a, `0`, fnmadd)
38738	}
38739
38740	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38741	///
38742	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38743	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38744	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38745	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38746	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38747	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38748	///
38749	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
38750	#[inline]
38751	#[target_feature(enable = "avx512f")]
38752	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38753	#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = `8`))]
38754	#[rustc_legacy_const_generics(`4`)]
38755	pub unsafe fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
38756	a: __m128d,
38757	k: __mmask8,
38758	b: __m128d,
38759	c: __m128d,
38760	) -> __m128d {
38761	static_assert_rounding!(ROUNDING);
38762	let mut fnmadd: f64 = simd_extract!(a, `0`);
38763	if (k & `0b00000001`) != `0` {
38764	let extracta: f64 = -fnmadd;
38765	let extractb: f64 = simd_extract!(b, `0`);
38766	let extractc: f64 = simd_extract!(c, `0`);
38767	fnmadd = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38768	}
38769	simd_insert!(a, `0`, fnmadd)
38770	}
38771
38772	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38773	///
38774	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38775	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38776	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38777	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38778	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38779	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38780	///
38781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
38782	#[inline]
38783	#[target_feature(enable = "avx512f")]
38784	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38785	#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = `8`))]
38786	#[rustc_legacy_const_generics(`4`)]
38787	pub unsafe fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
38788	k: __mmask8,
38789	a: __m128d,
38790	b: __m128d,
38791	c: __m128d,
38792	) -> __m128d {
38793	static_assert_rounding!(ROUNDING);
38794	let mut fnmadd: f64 = `0.`;
38795	if (k & `0b00000001`) != `0` {
38796	let extracta: f64 = simd_extract!(a, `0`);
38797	let extracta: f64 = -extracta;
38798	let extractb: f64 = simd_extract!(b, `0`);
38799	let extractc: f64 = simd_extract!(c, `0`);
38800	fnmadd = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38801	}
38802	simd_insert!(a, `0`, fnmadd)
38803	}
38804
38805	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
38806	///
38807	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38808	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38809	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38810	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38811	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38812	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38813	///
38814	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_Sd&expand=2737)
38815	#[inline]
38816	#[target_feature(enable = "avx512f")]
38817	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38818	#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = `8`))]
38819	#[rustc_legacy_const_generics(`4`)]
38820	pub unsafe fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
38821	a: __m128d,
38822	b: __m128d,
38823	c: __m128d,
38824	k: __mmask8,
38825	) -> __m128d {
38826	static_assert_rounding!(ROUNDING);
38827	let mut fnmadd: f64 = simd_extract!(c, `0`);
38828	if (k & `0b00000001`) != `0` {
38829	let extracta: f64 = simd_extract!(a, `0`);
38830	let extracta: f64 = -extracta;
38831	let extractb: f64 = simd_extract!(b, `0`);
38832	fnmadd = vfmadd132sd(a:extracta, b:extractb, c:fnmadd, ROUNDING);
38833	}
38834	simd_insert!(c, `0`, fnmadd)
38835	}
38836
38837	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38838	///
38839	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38840	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38841	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38842	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38843	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38844	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38845	///
38846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
38847	#[inline]
38848	#[target_feature(enable = "avx512f")]
38849	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38850	#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = `8`))]
38851	#[rustc_legacy_const_generics(`3`)]
38852	pub unsafe fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
38853	static_assert_rounding!(ROUNDING);
38854	let extracta: f32 = simd_extract!(a, `0`);
38855	let extracta: f32 = -extracta;
38856	let extractb: f32 = simd_extract!(b, `0`);
38857	let extractc: f32 = simd_extract!(c, `0`);
38858	let extractc: f32 = -extractc;
38859	let fnmsub: f32 = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38860	simd_insert!(a, `0`, fnmsub)
38861	}
38862
38863	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38864	///
38865	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38866	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38867	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38868	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38869	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38870	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38871	///
38872	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
38873	#[inline]
38874	#[target_feature(enable = "avx512f")]
38875	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38876	#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = `8`))]
38877	#[rustc_legacy_const_generics(`4`)]
38878	pub unsafe fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
38879	a: __m128,
38880	k: __mmask8,
38881	b: __m128,
38882	c: __m128,
38883	) -> __m128 {
38884	static_assert_rounding!(ROUNDING);
38885	let mut fnmsub: f32 = simd_extract!(a, `0`);
38886	if (k & `0b00000001`) != `0` {
38887	let extracta: f32 = -fnmsub;
38888	let extractb: f32 = simd_extract!(b, `0`);
38889	let extractc: f32 = simd_extract!(c, `0`);
38890	let extractc: f32 = -extractc;
38891	fnmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38892	}
38893	simd_insert!(a, `0`, fnmsub)
38894	}
38895
38896	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38897	///
38898	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38899	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38900	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38901	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38902	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38903	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38904	///
38905	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
38906	#[inline]
38907	#[target_feature(enable = "avx512f")]
38908	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38909	#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = `8`))]
38910	#[rustc_legacy_const_generics(`4`)]
38911	pub unsafe fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
38912	k: __mmask8,
38913	a: __m128,
38914	b: __m128,
38915	c: __m128,
38916	) -> __m128 {
38917	static_assert_rounding!(ROUNDING);
38918	let mut fnmsub: f32 = `0.`;
38919	if (k & `0b00000001`) != `0` {
38920	let extracta: f32 = simd_extract!(a, `0`);
38921	let extracta: f32 = -extracta;
38922	let extractb: f32 = simd_extract!(b, `0`);
38923	let extractc: f32 = simd_extract!(c, `0`);
38924	let extractc: f32 = -extractc;
38925	fnmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38926	}
38927	simd_insert!(a, `0`, fnmsub)
38928	}
38929
38930	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
38931	///
38932	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38933	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38934	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38935	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38936	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38937	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38938	///
38939	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
38940	#[inline]
38941	#[target_feature(enable = "avx512f")]
38942	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38943	#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = `8`))]
38944	#[rustc_legacy_const_generics(`4`)]
38945	pub unsafe fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
38946	a: __m128,
38947	b: __m128,
38948	c: __m128,
38949	k: __mmask8,
38950	) -> __m128 {
38951	static_assert_rounding!(ROUNDING);
38952	let mut fnmsub: f32 = simd_extract!(c, `0`);
38953	if (k & `0b00000001`) != `0` {
38954	let extracta: f32 = simd_extract!(a, `0`);
38955	let extracta: f32 = -extracta;
38956	let extractb: f32 = simd_extract!(b, `0`);
38957	let extractc: f32 = -fnmsub;
38958	fnmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38959	}
38960	simd_insert!(c, `0`, fnmsub)
38961	}
38962
38963	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38964	///
38965	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38966	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38967	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38968	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38969	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38970	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38971	///
38972	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
38973	#[inline]
38974	#[target_feature(enable = "avx512f")]
38975	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38976	#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = `8`))]
38977	#[rustc_legacy_const_generics(`3`)]
38978	pub unsafe fn _mm_fnmsub_round_sd<const ROUNDING: i32>(
38979	a: __m128d,
38980	b: __m128d,
38981	c: __m128d,
38982	) -> __m128d {
38983	static_assert_rounding!(ROUNDING);
38984	let extracta: f64 = simd_extract!(a, `0`);
38985	let extracta: f64 = -extracta;
38986	let extractb: f64 = simd_extract!(b, `0`);
38987	let extractc: f64 = simd_extract!(c, `0`);
38988	let extractc: f64 = -extractc;
38989	let fnmsub: f64 = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38990	simd_insert!(a, `0`, fnmsub)
38991	}
38992
38993	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38994	///
38995	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38996	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38997	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38998	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38999	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39000	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39001	///
39002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
39003	#[inline]
39004	#[target_feature(enable = "avx512f")]
39005	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39006	#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = `8`))]
39007	#[rustc_legacy_const_generics(`4`)]
39008	pub unsafe fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
39009	a: __m128d,
39010	k: __mmask8,
39011	b: __m128d,
39012	c: __m128d,
39013	) -> __m128d {
39014	static_assert_rounding!(ROUNDING);
39015	let mut fnmsub: f64 = simd_extract!(a, `0`);
39016	if (k & `0b00000001`) != `0` {
39017	let extracta: f64 = -fnmsub;
39018	let extractb: f64 = simd_extract!(b, `0`);
39019	let extractc: f64 = simd_extract!(c, `0`);
39020	let extractc: f64 = -extractc;
39021	fnmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
39022	}
39023	simd_insert!(a, `0`, fnmsub)
39024	}
39025
39026	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39027	///
39028	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39029	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39030	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39031	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39032	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39033	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39034	///
39035	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
39036	#[inline]
39037	#[target_feature(enable = "avx512f")]
39038	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39039	#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = `8`))]
39040	#[rustc_legacy_const_generics(`4`)]
39041	pub unsafe fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
39042	k: __mmask8,
39043	a: __m128d,
39044	b: __m128d,
39045	c: __m128d,
39046	) -> __m128d {
39047	static_assert_rounding!(ROUNDING);
39048	let mut fnmsub: f64 = `0.`;
39049	if (k & `0b00000001`) != `0` {
39050	let extracta: f64 = simd_extract!(a, `0`);
39051	let extracta: f64 = -extracta;
39052	let extractb: f64 = simd_extract!(b, `0`);
39053	let extractc: f64 = simd_extract!(c, `0`);
39054	let extractc: f64 = -extractc;
39055	fnmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
39056	}
39057	simd_insert!(a, `0`, fnmsub)
39058	}
39059
39060	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
39061	///
39062	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39063	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39064	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39065	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39066	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39067	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39068	///
39069	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
39070	#[inline]
39071	#[target_feature(enable = "avx512f")]
39072	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39073	#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = `8`))]
39074	#[rustc_legacy_const_generics(`4`)]
39075	pub unsafe fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
39076	a: __m128d,
39077	b: __m128d,
39078	c: __m128d,
39079	k: __mmask8,
39080	) -> __m128d {
39081	static_assert_rounding!(ROUNDING);
39082	let mut fnmsub: f64 = simd_extract!(c, `0`);
39083	if (k & `0b00000001`) != `0` {
39084	let extracta: f64 = simd_extract!(a, `0`);
39085	let extracta: f64 = -extracta;
39086	let extractb: f64 = simd_extract!(b, `0`);
39087	let extractc: f64 = -fnmsub;
39088	fnmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
39089	}
39090	simd_insert!(c, `0`, fnmsub)
39091	}
39092
39093	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
39094	///
39095	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
39096	#[inline]
39097	#[target_feature(enable = "avx512f")]
39098	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39099	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`))]
39100	#[rustc_legacy_const_generics(`3`)]
39101	pub unsafe fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
39102	static_assert_uimm_bits!(IMM8, `8`);
39103	let a: f32x4 = a.as_f32x4();
39104	let b: f32x4 = b.as_f32x4();
39105	let c: i32x4 = c.as_i32x4();
39106	let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION);
39107	let fixupimm: f32 = simd_extract!(r, `0`);
39108	let r: f32x4 = simd_insert!(a, `0`, fixupimm);
39109	transmute(src:r)
39110	}
39111
39112	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
39113	///
39114	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
39115	#[inline]
39116	#[target_feature(enable = "avx512f")]
39117	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39118	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`))]
39119	#[rustc_legacy_const_generics(`4`)]
39120	pub unsafe fn _mm_mask_fixupimm_ss<const IMM8: i32>(
39121	a: __m128,
39122	k: __mmask8,
39123	b: __m128,
39124	c: __m128i,
39125	) -> __m128 {
39126	static_assert_uimm_bits!(IMM8, `8`);
39127	let a: f32x4 = a.as_f32x4();
39128	let b: f32x4 = b.as_f32x4();
39129	let c: i32x4 = c.as_i32x4();
39130	let fixupimm: f32x4 = vfixupimmss(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
39131	let fixupimm: f32 = simd_extract!(fixupimm, `0`);
39132	let r: f32x4 = simd_insert!(a, `0`, fixupimm);
39133	transmute(src:r)
39134	}
39135
39136	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
39137	///
39138	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
39139	#[inline]
39140	#[target_feature(enable = "avx512f")]
39141	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39142	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`))]
39143	#[rustc_legacy_const_generics(`4`)]
39144	pub unsafe fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
39145	k: __mmask8,
39146	a: __m128,
39147	b: __m128,
39148	c: __m128i,
39149	) -> __m128 {
39150	static_assert_uimm_bits!(IMM8, `8`);
39151	let a: f32x4 = a.as_f32x4();
39152	let b: f32x4 = b.as_f32x4();
39153	let c: i32x4 = c.as_i32x4();
39154	let fixupimm: f32x4 = vfixupimmssz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
39155	let fixupimm: f32 = simd_extract!(fixupimm, `0`);
39156	let r: f32x4 = simd_insert!(a, `0`, fixupimm);
39157	transmute(src:r)
39158	}
39159
39160	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
39161	///
39162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
39163	#[inline]
39164	#[target_feature(enable = "avx512f")]
39165	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39166	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`))]
39167	#[rustc_legacy_const_generics(`3`)]
39168	pub unsafe fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
39169	static_assert_uimm_bits!(IMM8, `8`);
39170	let a: f64x2 = a.as_f64x2();
39171	let b: f64x2 = b.as_f64x2();
39172	let c: i64x2 = c.as_i64x2();
39173	let fixupimm: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION);
39174	let fixupimm: f64 = simd_extract!(fixupimm, `0`);
39175	let r: f64x2 = simd_insert!(a, `0`, fixupimm);
39176	transmute(src:r)
39177	}
39178
39179	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
39180	///
39181	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
39182	#[inline]
39183	#[target_feature(enable = "avx512f")]
39184	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39185	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`))]
39186	#[rustc_legacy_const_generics(`4`)]
39187	pub unsafe fn _mm_mask_fixupimm_sd<const IMM8: i32>(
39188	a: __m128d,
39189	k: __mmask8,
39190	b: __m128d,
39191	c: __m128i,
39192	) -> __m128d {
39193	static_assert_uimm_bits!(IMM8, `8`);
39194	let a: f64x2 = a.as_f64x2();
39195	let b: f64x2 = b.as_f64x2();
39196	let c: i64x2 = c.as_i64x2();
39197	let fixupimm: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
39198	let fixupimm: f64 = simd_extract!(fixupimm, `0`);
39199	let r: f64x2 = simd_insert!(a, `0`, fixupimm);
39200	transmute(src:r)
39201	}
39202
39203	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
39204	///
39205	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
39206	#[inline]
39207	#[target_feature(enable = "avx512f")]
39208	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39209	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`))]
39210	#[rustc_legacy_const_generics(`4`)]
39211	pub unsafe fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
39212	k: __mmask8,
39213	a: __m128d,
39214	b: __m128d,
39215	c: __m128i,
39216	) -> __m128d {
39217	static_assert_uimm_bits!(IMM8, `8`);
39218	let a: f64x2 = a.as_f64x2();
39219	let b: f64x2 = b.as_f64x2();
39220	let c: i64x2 = c.as_i64x2();
39221	let fixupimm: f64x2 = vfixupimmsdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
39222	let fixupimm: f64 = simd_extract!(fixupimm, `0`);
39223	let r: f64x2 = simd_insert!(a, `0`, fixupimm);
39224	transmute(src:r)
39225	}
39226
39227	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
39228	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39229	///
39230	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
39231	#[inline]
39232	#[target_feature(enable = "avx512f")]
39233	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39234	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`, SAE = `8`))]
39235	#[rustc_legacy_const_generics(`3`, `4`)]
39236	pub unsafe fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
39237	a: __m128,
39238	b: __m128,
39239	c: __m128i,
39240	) -> __m128 {
39241	static_assert_uimm_bits!(IMM8, `8`);
39242	static_assert_mantissas_sae!(SAE);
39243	let a: f32x4 = a.as_f32x4();
39244	let b: f32x4 = b.as_f32x4();
39245	let c: i32x4 = c.as_i32x4();
39246	let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:`0b11111111`, SAE);
39247	let fixupimm: f32 = simd_extract!(r, `0`);
39248	let r: f32x4 = simd_insert!(a, `0`, fixupimm);
39249	transmute(src:r)
39250	}
39251
39252	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
39253	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39254	///
39255	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
39256	#[inline]
39257	#[target_feature(enable = "avx512f")]
39258	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39259	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`, SAE = `8`))]
39260	#[rustc_legacy_const_generics(`4`, `5`)]
39261	pub unsafe fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
39262	a: __m128,
39263	k: __mmask8,
39264	b: __m128,
39265	c: __m128i,
39266	) -> __m128 {
39267	static_assert_uimm_bits!(IMM8, `8`);
39268	static_assert_mantissas_sae!(SAE);
39269	let a: f32x4 = a.as_f32x4();
39270	let b: f32x4 = b.as_f32x4();
39271	let c: i32x4 = c.as_i32x4();
39272	let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:k, SAE);
39273	let fixupimm: f32 = simd_extract!(r, `0`);
39274	let r: f32x4 = simd_insert!(a, `0`, fixupimm);
39275	transmute(src:r)
39276	}
39277
39278	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
39279	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39280	///
39281	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
39282	#[inline]
39283	#[target_feature(enable = "avx512f")]
39284	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39285	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`, SAE = `8`))]
39286	#[rustc_legacy_const_generics(`4`, `5`)]
39287	pub unsafe fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
39288	k: __mmask8,
39289	a: __m128,
39290	b: __m128,
39291	c: __m128i,
39292	) -> __m128 {
39293	static_assert_uimm_bits!(IMM8, `8`);
39294	static_assert_mantissas_sae!(SAE);
39295	let a: f32x4 = a.as_f32x4();
39296	let b: f32x4 = b.as_f32x4();
39297	let c: i32x4 = c.as_i32x4();
39298	let r: f32x4 = vfixupimmssz(a, b, c, IMM8, mask:k, SAE);
39299	let fixupimm: f32 = simd_extract!(r, `0`);
39300	let r: f32x4 = simd_insert!(a, `0`, fixupimm);
39301	transmute(src:r)
39302	}
39303
39304	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
39305	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39306	///
39307	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
39308	#[inline]
39309	#[target_feature(enable = "avx512f")]
39310	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39311	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`, SAE = `8`))]
39312	#[rustc_legacy_const_generics(`3`, `4`)]
39313	pub unsafe fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
39314	a: __m128d,
39315	b: __m128d,
39316	c: __m128i,
39317	) -> __m128d {
39318	static_assert_uimm_bits!(IMM8, `8`);
39319	static_assert_mantissas_sae!(SAE);
39320	let a: f64x2 = a.as_f64x2();
39321	let b: f64x2 = b.as_f64x2();
39322	let c: i64x2 = c.as_i64x2();
39323	let r: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:`0b11111111`, SAE);
39324	let fixupimm: f64 = simd_extract!(r, `0`);
39325	let r: f64x2 = simd_insert!(a, `0`, fixupimm);
39326	transmute(src:r)
39327	}
39328
39329	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
39330	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39331	///
39332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
39333	#[inline]
39334	#[target_feature(enable = "avx512f")]
39335	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39336	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`, SAE = `8`))]
39337	#[rustc_legacy_const_generics(`4`, `5`)]
39338	pub unsafe fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
39339	a: __m128d,
39340	k: __mmask8,
39341	b: __m128d,
39342	c: __m128i,
39343	) -> __m128d {
39344	static_assert_uimm_bits!(IMM8, `8`);
39345	static_assert_mantissas_sae!(SAE);
39346	let a: f64x2 = a.as_f64x2();
39347	let b: f64x2 = b.as_f64x2();
39348	let c: i64x2 = c.as_i64x2();
39349	let r: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:k, SAE);
39350	let fixupimm: f64 = simd_extract!(r, `0`);
39351	let r: f64x2 = simd_insert!(a, `0`, fixupimm);
39352	transmute(src:r)
39353	}
39354
39355	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
39356	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39357	///
39358	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
39359	#[inline]
39360	#[target_feature(enable = "avx512f")]
39361	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39362	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`, SAE = `8`))]
39363	#[rustc_legacy_const_generics(`4`, `5`)]
39364	pub unsafe fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
39365	k: __mmask8,
39366	a: __m128d,
39367	b: __m128d,
39368	c: __m128i,
39369	) -> __m128d {
39370	static_assert_uimm_bits!(IMM8, `8`);
39371	static_assert_mantissas_sae!(SAE);
39372	let a: f64x2 = a.as_f64x2();
39373	let b: f64x2 = b.as_f64x2();
39374	let c: i64x2 = c.as_i64x2();
39375	let r: f64x2 = vfixupimmsdz(a, b, c, IMM8, mask:k, SAE);
39376	let fixupimm: f64 = simd_extract!(r, `0`);
39377	let r: f64x2 = simd_insert!(a, `0`, fixupimm);
39378	transmute(src:r)
39379	}
39380
39381	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39382	///
39383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
39384	#[inline]
39385	#[target_feature(enable = "avx512f")]
39386	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39387	#[cfg_attr(test, assert_instr(vcvtss2sd))]
39388	pub unsafe fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
39389	transmute(src:vcvtss2sd(
39390	a:a.as_f64x2(),
39391	a:b.as_f32x4(),
39392	src:src.as_f64x2(),
39393	mask:k,
39394	_MM_FROUND_CUR_DIRECTION,
39395	))
39396	}
39397
39398	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39399	///
39400	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
39401	#[inline]
39402	#[target_feature(enable = "avx512f")]
39403	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39404	#[cfg_attr(test, assert_instr(vcvtss2sd))]
39405	pub unsafe fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
39406	transmute(src:vcvtss2sd(
39407	a:a.as_f64x2(),
39408	a:b.as_f32x4(),
39409	src:_mm_setzero_pd().as_f64x2(),
39410	mask:k,
39411	_MM_FROUND_CUR_DIRECTION,
39412	))
39413	}
39414
39415	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39416	///
39417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
39418	#[inline]
39419	#[target_feature(enable = "avx512f")]
39420	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39421	#[cfg_attr(test, assert_instr(vcvtsd2ss))]
39422	pub unsafe fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
39423	transmute(src:vcvtsd2ss(
39424	a:a.as_f32x4(),
39425	b:b.as_f64x2(),
39426	src:src.as_f32x4(),
39427	mask:k,
39428	_MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC,
39429	))
39430	}
39431
39432	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39433	///
39434	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
39435	#[inline]
39436	#[target_feature(enable = "avx512f")]
39437	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39438	#[cfg_attr(test, assert_instr(vcvtsd2ss))]
39439	pub unsafe fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
39440	transmute(src:vcvtsd2ss(
39441	a:a.as_f32x4(),
39442	b:b.as_f64x2(),
39443	src:_mm_setzero_ps().as_f32x4(),
39444	mask:k,
39445	_MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC,
39446	))
39447	}
39448
39449	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39450	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39451	///
39452	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
39453	#[inline]
39454	#[target_feature(enable = "avx512f")]
39455	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39456	#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = `8`))]
39457	#[rustc_legacy_const_generics(`2`)]
39458	pub unsafe fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
39459	static_assert_sae!(SAE);
39460	let a: f64x2 = a.as_f64x2();
39461	let b: f32x4 = b.as_f32x4();
39462	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
39463	let r: f64x2 = vcvtss2sd(a, a:b, src:zero, mask:`0b11111111`, SAE);
39464	transmute(src:r)
39465	}
39466
39467	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39468	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39469	///
39470	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
39471	#[inline]
39472	#[target_feature(enable = "avx512f")]
39473	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39474	#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = `8`))]
39475	#[rustc_legacy_const_generics(`4`)]
39476	pub unsafe fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
39477	src: __m128d,
39478	k: __mmask8,
39479	a: __m128d,
39480	b: __m128,
39481	) -> __m128d {
39482	static_assert_sae!(SAE);
39483	let a: f64x2 = a.as_f64x2();
39484	let b: f32x4 = b.as_f32x4();
39485	let src: f64x2 = src.as_f64x2();
39486	let r: f64x2 = vcvtss2sd(a, a:b, src, mask:k, SAE);
39487	transmute(src:r)
39488	}
39489
39490	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39491	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39492	///
39493	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
39494	#[inline]
39495	#[target_feature(enable = "avx512f")]
39496	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39497	#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = `8`))]
39498	#[rustc_legacy_const_generics(`3`)]
39499	pub unsafe fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(
39500	k: __mmask8,
39501	a: __m128d,
39502	b: __m128,
39503	) -> __m128d {
39504	static_assert_sae!(SAE);
39505	let a: f64x2 = a.as_f64x2();
39506	let b: f32x4 = b.as_f32x4();
39507	let zero: f64x2 = _mm_setzero_pd().as_f64x2();
39508	let r: f64x2 = vcvtss2sd(a, a:b, src:zero, mask:k, SAE);
39509	transmute(src:r)
39510	}
39511
39512	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39513	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39514	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39515	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39516	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39517	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39518	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39519	///
39520	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
39521	#[inline]
39522	#[target_feature(enable = "avx512f")]
39523	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39524	#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = `8`))]
39525	#[rustc_legacy_const_generics(`2`)]
39526	pub unsafe fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
39527	static_assert_rounding!(ROUNDING);
39528	let a: f32x4 = a.as_f32x4();
39529	let b: f64x2 = b.as_f64x2();
39530	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
39531	let r: f32x4 = vcvtsd2ss(a, b, src:zero, mask:`0b11111111`, ROUNDING);
39532	transmute(src:r)
39533	}
39534
39535	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39536	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39537	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39538	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39539	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39540	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39541	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39542	///
39543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
39544	#[inline]
39545	#[target_feature(enable = "avx512f")]
39546	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39547	#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = `8`))]
39548	#[rustc_legacy_const_generics(`4`)]
39549	pub unsafe fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
39550	src: __m128,
39551	k: __mmask8,
39552	a: __m128,
39553	b: __m128d,
39554	) -> __m128 {
39555	static_assert_rounding!(ROUNDING);
39556	let a: f32x4 = a.as_f32x4();
39557	let b: f64x2 = b.as_f64x2();
39558	let src: f32x4 = src.as_f32x4();
39559	let r: f32x4 = vcvtsd2ss(a, b, src, mask:k, ROUNDING);
39560	transmute(src:r)
39561	}
39562
39563	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39564	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39565	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39566	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39567	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39568	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39569	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39570	///
39571	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
39572	#[inline]
39573	#[target_feature(enable = "avx512f")]
39574	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39575	#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = `8`))]
39576	#[rustc_legacy_const_generics(`3`)]
39577	pub unsafe fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(
39578	k: __mmask8,
39579	a: __m128,
39580	b: __m128d,
39581	) -> __m128 {
39582	static_assert_rounding!(ROUNDING);
39583	let a: f32x4 = a.as_f32x4();
39584	let b: f64x2 = b.as_f64x2();
39585	let zero: f32x4 = _mm_setzero_ps().as_f32x4();
39586	let r: f32x4 = vcvtsd2ss(a, b, src:zero, mask:k, ROUNDING);
39587	transmute(src:r)
39588	}
39589
39590	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
39591	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39592	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39593	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39594	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39595	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39596	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39597	///
39598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
39599	#[inline]
39600	#[target_feature(enable = "avx512f")]
39601	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39602	#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = `8`))]
39603	#[rustc_legacy_const_generics(`1`)]
39604	pub unsafe fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
39605	static_assert_rounding!(ROUNDING);
39606	let a: f32x4 = a.as_f32x4();
39607	vcvtss2si(a, ROUNDING)
39608	}
39609
39610	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
39611	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39612	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39613	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39614	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39615	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39616	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39617	///
39618	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
39619	#[inline]
39620	#[target_feature(enable = "avx512f")]
39621	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39622	#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = `8`))]
39623	#[rustc_legacy_const_generics(`1`)]
39624	pub unsafe fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
39625	static_assert_rounding!(ROUNDING);
39626	let a: f32x4 = a.as_f32x4();
39627	vcvtss2si(a, ROUNDING)
39628	}
39629
39630	/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
39631	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39632	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39633	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39634	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39635	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39636	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39637	///
39638	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
39639	#[inline]
39640	#[target_feature(enable = "avx512f")]
39641	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39642	#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = `8`))]
39643	#[rustc_legacy_const_generics(`1`)]
39644	pub unsafe fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
39645	static_assert_rounding!(ROUNDING);
39646	let a: f32x4 = a.as_f32x4();
39647	vcvtss2usi(a, ROUNDING)
39648	}
39649
39650	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
39651	///
39652	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
39653	#[inline]
39654	#[target_feature(enable = "avx512f")]
39655	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39656	#[cfg_attr(test, assert_instr(vcvtss2si))]
39657	pub unsafe fn _mm_cvtss_i32(a: __m128) -> i32 {
39658	vcvtss2si(a:a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
39659	}
39660
39661	/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
39662	///
39663	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
39664	#[inline]
39665	#[target_feature(enable = "avx512f")]
39666	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39667	#[cfg_attr(test, assert_instr(vcvtss2usi))]
39668	pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 {
39669	vcvtss2usi(a:a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
39670	}
39671
39672	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
39673	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39674	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39675	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39676	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39677	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39678	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39679	///
39680	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
39681	#[inline]
39682	#[target_feature(enable = "avx512f")]
39683	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39684	#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = `8`))]
39685	#[rustc_legacy_const_generics(`1`)]
39686	pub unsafe fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
39687	static_assert_rounding!(ROUNDING);
39688	let a: f64x2 = a.as_f64x2();
39689	vcvtsd2si(a, ROUNDING)
39690	}
39691
39692	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
39693	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39694	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39695	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39696	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39697	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39698	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39699	///
39700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
39701	#[inline]
39702	#[target_feature(enable = "avx512f")]
39703	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39704	#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = `8`))]
39705	#[rustc_legacy_const_generics(`1`)]
39706	pub unsafe fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
39707	static_assert_rounding!(ROUNDING);
39708	let a: f64x2 = a.as_f64x2();
39709	vcvtsd2si(a, ROUNDING)
39710	}
39711
39712	/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
39713	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39714	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39715	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39716	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39717	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39718	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39719	///
39720	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=cvt_roundsd_u32&expand=1364)
39721	#[inline]
39722	#[target_feature(enable = "avx512f")]
39723	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39724	#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = `8`))]
39725	#[rustc_legacy_const_generics(`1`)]
39726	pub unsafe fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
39727	static_assert_rounding!(ROUNDING);
39728	let a: f64x2 = a.as_f64x2();
39729	vcvtsd2usi(a, ROUNDING)
39730	}
39731
39732	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
39733	///
39734	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
39735	#[inline]
39736	#[target_feature(enable = "avx512f")]
39737	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39738	#[cfg_attr(test, assert_instr(vcvtsd2si))]
39739	pub unsafe fn _mm_cvtsd_i32(a: __m128d) -> i32 {
39740	vcvtsd2si(a:a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
39741	}
39742
39743	/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
39744	///
39745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
39746	#[inline]
39747	#[target_feature(enable = "avx512f")]
39748	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39749	#[cfg_attr(test, assert_instr(vcvtsd2usi))]
39750	pub unsafe fn _mm_cvtsd_u32(a: __m128d) -> u32 {
39751	vcvtsd2usi(a:a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
39752	}
39753
39754	/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39755	///
39756	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39757	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39758	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39759	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39760	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39761	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39762	///
39763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
39764	#[inline]
39765	#[target_feature(enable = "avx512f")]
39766	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39767	#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = `8`))]
39768	#[rustc_legacy_const_generics(`2`)]
39769	pub unsafe fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
39770	static_assert_rounding!(ROUNDING);
39771	let a: f32x4 = a.as_f32x4();
39772	let r: f32x4 = vcvtsi2ss(a, b, ROUNDING);
39773	transmute(src:r)
39774	}
39775
39776	/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39777	///
39778	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39779	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39780	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39781	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39782	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39783	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39784	///
39785	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
39786	#[inline]
39787	#[target_feature(enable = "avx512f")]
39788	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39789	#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = `8`))]
39790	#[rustc_legacy_const_generics(`2`)]
39791	pub unsafe fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
39792	static_assert_rounding!(ROUNDING);
39793	let a: f32x4 = a.as_f32x4();
39794	let r: f32x4 = vcvtsi2ss(a, b, ROUNDING);
39795	transmute(src:r)
39796	}
39797
39798	/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39799	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39800	/// (_MM_FROUND_TO_NEAREST_INT \|_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39801	/// (_MM_FROUND_TO_NEG_INF \|_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39802	/// (_MM_FROUND_TO_POS_INF \|_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39803	/// (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39804	/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39805	///
39806	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
39807	#[inline]
39808	#[target_feature(enable = "avx512f")]
39809	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39810	#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = `8`))]
39811	#[rustc_legacy_const_generics(`2`)]
39812	pub unsafe fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
39813	static_assert_rounding!(ROUNDING);
39814	let a: f32x4 = a.as_f32x4();
39815	let r: f32x4 = vcvtusi2ss(a, b, ROUNDING);
39816	transmute(src:r)
39817	}
39818
39819	/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39820	///
39821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
39822	#[inline]
39823	#[target_feature(enable = "avx512f")]
39824	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39825	#[cfg_attr(test, assert_instr(vcvtsi2ss))]
39826	pub unsafe fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
39827	let b: f32 = b as f32;
39828	simd_insert!(a, `0`, b)
39829	}
39830
39831	/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
39832	///
39833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
39834	#[inline]
39835	#[target_feature(enable = "avx512f")]
39836	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39837	#[cfg_attr(test, assert_instr(vcvtsi2sd))]
39838	pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
39839	let b: f64 = b as f64;
39840	simd_insert!(a, `0`, b)
39841	}
39842
39843	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
39844	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39845	///
39846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_Si32&expand=1936)
39847	#[inline]
39848	#[target_feature(enable = "avx512f")]
39849	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39850	#[cfg_attr(test, assert_instr(vcvtss2si, SAE = `8`))]
39851	#[rustc_legacy_const_generics(`1`)]
39852	pub unsafe fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
39853	static_assert_sae!(SAE);
39854	let a: f32x4 = a.as_f32x4();
39855	vcvtss2si(a, SAE)
39856	}
39857
39858	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
39859	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39860	///
39861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
39862	#[inline]
39863	#[target_feature(enable = "avx512f")]
39864	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39865	#[cfg_attr(test, assert_instr(vcvtss2si, SAE = `8`))]
39866	#[rustc_legacy_const_generics(`1`)]
39867	pub unsafe fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
39868	static_assert_sae!(SAE);
39869	let a: f32x4 = a.as_f32x4();
39870	vcvtss2si(a, SAE)
39871	}
39872
39873	/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
39874	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39875	///
39876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
39877	#[inline]
39878	#[target_feature(enable = "avx512f")]
39879	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39880	#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = `8`))]
39881	#[rustc_legacy_const_generics(`1`)]
39882	pub unsafe fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
39883	static_assert_sae!(SAE);
39884	let a: f32x4 = a.as_f32x4();
39885	vcvtss2usi(a, SAE)
39886	}
39887
39888	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
39889	///
39890	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
39891	#[inline]
39892	#[target_feature(enable = "avx512f")]
39893	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39894	#[cfg_attr(test, assert_instr(vcvtss2si))]
39895	pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 {
39896	vcvtss2si(a:a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
39897	}
39898
39899	/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
39900	///
39901	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
39902	#[inline]
39903	#[target_feature(enable = "avx512f")]
39904	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39905	#[cfg_attr(test, assert_instr(vcvtss2usi))]
39906	pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 {
39907	vcvtss2usi(a:a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
39908	}
39909
39910	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
39911	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39912	///
39913	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
39914	#[inline]
39915	#[target_feature(enable = "avx512f")]
39916	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39917	#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = `8`))]
39918	#[rustc_legacy_const_generics(`1`)]
39919	pub unsafe fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
39920	static_assert_sae!(SAE);
39921	let a: f64x2 = a.as_f64x2();
39922	vcvtsd2si(a, SAE)
39923	}
39924
39925	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
39926	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39927	///
39928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
39929	#[inline]
39930	#[target_feature(enable = "avx512f")]
39931	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39932	#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = `8`))]
39933	#[rustc_legacy_const_generics(`1`)]
39934	pub unsafe fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
39935	static_assert_sae!(SAE);
39936	let a: f64x2 = a.as_f64x2();
39937	vcvtsd2si(a, SAE)
39938	}
39939
39940	/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
39941	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39942	///
39943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
39944	#[inline]
39945	#[target_feature(enable = "avx512f")]
39946	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39947	#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = `8`))]
39948	#[rustc_legacy_const_generics(`1`)]
39949	pub unsafe fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
39950	static_assert_sae!(SAE);
39951	let a: f64x2 = a.as_f64x2();
39952	vcvtsd2usi(a, SAE)
39953	}
39954
39955	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
39956	///
39957	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
39958	#[inline]
39959	#[target_feature(enable = "avx512f")]
39960	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39961	#[cfg_attr(test, assert_instr(vcvtsd2si))]
39962	pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 {
39963	vcvtsd2si(a:a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
39964	}
39965
39966	/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
39967	///
39968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
39969	#[inline]
39970	#[target_feature(enable = "avx512f")]
39971	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39972	#[cfg_attr(test, assert_instr(vcvtsd2usi))]
39973	pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 {
39974	vcvtsd2usi(a:a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
39975	}
39976
39977	/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39978	///
39979	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
39980	#[inline]
39981	#[target_feature(enable = "avx512f")]
39982	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39983	#[cfg_attr(test, assert_instr(vcvtusi2ss))]
39984	pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
39985	let b: f32 = b as f32;
39986	simd_insert!(a, `0`, b)
39987	}
39988
39989	/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
39990	///
39991	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
39992	#[inline]
39993	#[target_feature(enable = "avx512f")]
39994	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39995	#[cfg_attr(test, assert_instr(vcvtusi2sd))]
39996	pub unsafe fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
39997	let b: f64 = b as f64;
39998	simd_insert!(a, `0`, b)
39999	}
40000
40001	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
40002	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40003	///
40004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
40005	#[inline]
40006	#[target_feature(enable = "avx512f")]
40007	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40008	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `5`, SAE = `4`))] //should be vcomiss
40009	#[rustc_legacy_const_generics(`2`, `3`)]
40010	pub unsafe fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
40011	static_assert_uimm_bits!(IMM5, `5`);
40012	static_assert_mantissas_sae!(SAE);
40013	let a: f32x4 = a.as_f32x4();
40014	let b: f32x4 = b.as_f32x4();
40015	vcomiss(a, b, IMM5, SAE)
40016	}
40017
40018	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
40019	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40020	///
40021	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
40022	#[inline]
40023	#[target_feature(enable = "avx512f")]
40024	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40025	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `5`, SAE = `4`))] //should be vcomisd
40026	#[rustc_legacy_const_generics(`2`, `3`)]
40027	pub unsafe fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
40028	static_assert_uimm_bits!(IMM5, `5`);
40029	static_assert_mantissas_sae!(SAE);
40030	let a: f64x2 = a.as_f64x2();
40031	let b: f64x2 = b.as_f64x2();
40032	vcomisd(a, b, IMM5, SAE)
40033	}
40034
40035	/// Equal
40036	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40037	pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = `0x00`;
40038	/// Less-than
40039	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40040	pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = `0x01`;
40041	/// Less-than-or-equal
40042	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40043	pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = `0x02`;
40044	/// False
40045	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40046	pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = `0x03`;
40047	/// Not-equal
40048	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40049	pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = `0x04`;
40050	/// Not less-than
40051	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40052	pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = `0x05`;
40053	/// Not less-than-or-equal
40054	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40055	pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = `0x06`;
40056	/// True
40057	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40058	pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = `0x07`;
40059
40060	/// interval [1, 2)
40061	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40062	pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = `0x00`;
40063	/// interval [0.5, 2)
40064	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40065	pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = `0x01`;
40066	/// interval [0.5, 1)
40067	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40068	pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = `0x02`;
40069	/// interval [0.75, 1.5)
40070	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40071	pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = `0x03`;
40072
40073	/// sign = sign(SRC)
40074	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40075	pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = `0x00`;
40076	/// sign = 0
40077	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40078	pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = `0x01`;
40079	/// DEST = NaN if sign(SRC) = 1
40080	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40081	pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = `0x02`;
40082
40083	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40084	pub const _MM_PERM_AAAA: _MM_PERM_ENUM = `0x00`;
40085	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40086	pub const _MM_PERM_AAAB: _MM_PERM_ENUM = `0x01`;
40087	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40088	pub const _MM_PERM_AAAC: _MM_PERM_ENUM = `0x02`;
40089	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40090	pub const _MM_PERM_AAAD: _MM_PERM_ENUM = `0x03`;
40091	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40092	pub const _MM_PERM_AABA: _MM_PERM_ENUM = `0x04`;
40093	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40094	pub const _MM_PERM_AABB: _MM_PERM_ENUM = `0x05`;
40095	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40096	pub const _MM_PERM_AABC: _MM_PERM_ENUM = `0x06`;
40097	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40098	pub const _MM_PERM_AABD: _MM_PERM_ENUM = `0x07`;
40099	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40100	pub const _MM_PERM_AACA: _MM_PERM_ENUM = `0x08`;
40101	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40102	pub const _MM_PERM_AACB: _MM_PERM_ENUM = `0x09`;
40103	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40104	pub const _MM_PERM_AACC: _MM_PERM_ENUM = `0x0A`;
40105	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40106	pub const _MM_PERM_AACD: _MM_PERM_ENUM = `0x0B`;
40107	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40108	pub const _MM_PERM_AADA: _MM_PERM_ENUM = `0x0C`;
40109	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40110	pub const _MM_PERM_AADB: _MM_PERM_ENUM = `0x0D`;
40111	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40112	pub const _MM_PERM_AADC: _MM_PERM_ENUM = `0x0E`;
40113	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40114	pub const _MM_PERM_AADD: _MM_PERM_ENUM = `0x0F`;
40115	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40116	pub const _MM_PERM_ABAA: _MM_PERM_ENUM = `0x10`;
40117	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40118	pub const _MM_PERM_ABAB: _MM_PERM_ENUM = `0x11`;
40119	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40120	pub const _MM_PERM_ABAC: _MM_PERM_ENUM = `0x12`;
40121	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40122	pub const _MM_PERM_ABAD: _MM_PERM_ENUM = `0x13`;
40123	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40124	pub const _MM_PERM_ABBA: _MM_PERM_ENUM = `0x14`;
40125	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40126	pub const _MM_PERM_ABBB: _MM_PERM_ENUM = `0x15`;
40127	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40128	pub const _MM_PERM_ABBC: _MM_PERM_ENUM = `0x16`;
40129	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40130	pub const _MM_PERM_ABBD: _MM_PERM_ENUM = `0x17`;
40131	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40132	pub const _MM_PERM_ABCA: _MM_PERM_ENUM = `0x18`;
40133	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40134	pub const _MM_PERM_ABCB: _MM_PERM_ENUM = `0x19`;
40135	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40136	pub const _MM_PERM_ABCC: _MM_PERM_ENUM = `0x1A`;
40137	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40138	pub const _MM_PERM_ABCD: _MM_PERM_ENUM = `0x1B`;
40139	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40140	pub const _MM_PERM_ABDA: _MM_PERM_ENUM = `0x1C`;
40141	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40142	pub const _MM_PERM_ABDB: _MM_PERM_ENUM = `0x1D`;
40143	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40144	pub const _MM_PERM_ABDC: _MM_PERM_ENUM = `0x1E`;
40145	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40146	pub const _MM_PERM_ABDD: _MM_PERM_ENUM = `0x1F`;
40147	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40148	pub const _MM_PERM_ACAA: _MM_PERM_ENUM = `0x20`;
40149	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40150	pub const _MM_PERM_ACAB: _MM_PERM_ENUM = `0x21`;
40151	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40152	pub const _MM_PERM_ACAC: _MM_PERM_ENUM = `0x22`;
40153	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40154	pub const _MM_PERM_ACAD: _MM_PERM_ENUM = `0x23`;
40155	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40156	pub const _MM_PERM_ACBA: _MM_PERM_ENUM = `0x24`;
40157	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40158	pub const _MM_PERM_ACBB: _MM_PERM_ENUM = `0x25`;
40159	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40160	pub const _MM_PERM_ACBC: _MM_PERM_ENUM = `0x26`;
40161	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40162	pub const _MM_PERM_ACBD: _MM_PERM_ENUM = `0x27`;
40163	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40164	pub const _MM_PERM_ACCA: _MM_PERM_ENUM = `0x28`;
40165	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40166	pub const _MM_PERM_ACCB: _MM_PERM_ENUM = `0x29`;
40167	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40168	pub const _MM_PERM_ACCC: _MM_PERM_ENUM = `0x2A`;
40169	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40170	pub const _MM_PERM_ACCD: _MM_PERM_ENUM = `0x2B`;
40171	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40172	pub const _MM_PERM_ACDA: _MM_PERM_ENUM = `0x2C`;
40173	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40174	pub const _MM_PERM_ACDB: _MM_PERM_ENUM = `0x2D`;
40175	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40176	pub const _MM_PERM_ACDC: _MM_PERM_ENUM = `0x2E`;
40177	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40178	pub const _MM_PERM_ACDD: _MM_PERM_ENUM = `0x2F`;
40179	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40180	pub const _MM_PERM_ADAA: _MM_PERM_ENUM = `0x30`;
40181	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40182	pub const _MM_PERM_ADAB: _MM_PERM_ENUM = `0x31`;
40183	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40184	pub const _MM_PERM_ADAC: _MM_PERM_ENUM = `0x32`;
40185	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40186	pub const _MM_PERM_ADAD: _MM_PERM_ENUM = `0x33`;
40187	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40188	pub const _MM_PERM_ADBA: _MM_PERM_ENUM = `0x34`;
40189	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40190	pub const _MM_PERM_ADBB: _MM_PERM_ENUM = `0x35`;
40191	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40192	pub const _MM_PERM_ADBC: _MM_PERM_ENUM = `0x36`;
40193	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40194	pub const _MM_PERM_ADBD: _MM_PERM_ENUM = `0x37`;
40195	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40196	pub const _MM_PERM_ADCA: _MM_PERM_ENUM = `0x38`;
40197	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40198	pub const _MM_PERM_ADCB: _MM_PERM_ENUM = `0x39`;
40199	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40200	pub const _MM_PERM_ADCC: _MM_PERM_ENUM = `0x3A`;
40201	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40202	pub const _MM_PERM_ADCD: _MM_PERM_ENUM = `0x3B`;
40203	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40204	pub const _MM_PERM_ADDA: _MM_PERM_ENUM = `0x3C`;
40205	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40206	pub const _MM_PERM_ADDB: _MM_PERM_ENUM = `0x3D`;
40207	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40208	pub const _MM_PERM_ADDC: _MM_PERM_ENUM = `0x3E`;
40209	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40210	pub const _MM_PERM_ADDD: _MM_PERM_ENUM = `0x3F`;
40211	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40212	pub const _MM_PERM_BAAA: _MM_PERM_ENUM = `0x40`;
40213	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40214	pub const _MM_PERM_BAAB: _MM_PERM_ENUM = `0x41`;
40215	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40216	pub const _MM_PERM_BAAC: _MM_PERM_ENUM = `0x42`;
40217	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40218	pub const _MM_PERM_BAAD: _MM_PERM_ENUM = `0x43`;
40219	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40220	pub const _MM_PERM_BABA: _MM_PERM_ENUM = `0x44`;
40221	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40222	pub const _MM_PERM_BABB: _MM_PERM_ENUM = `0x45`;
40223	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40224	pub const _MM_PERM_BABC: _MM_PERM_ENUM = `0x46`;
40225	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40226	pub const _MM_PERM_BABD: _MM_PERM_ENUM = `0x47`;
40227	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40228	pub const _MM_PERM_BACA: _MM_PERM_ENUM = `0x48`;
40229	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40230	pub const _MM_PERM_BACB: _MM_PERM_ENUM = `0x49`;
40231	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40232	pub const _MM_PERM_BACC: _MM_PERM_ENUM = `0x4A`;
40233	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40234	pub const _MM_PERM_BACD: _MM_PERM_ENUM = `0x4B`;
40235	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40236	pub const _MM_PERM_BADA: _MM_PERM_ENUM = `0x4C`;
40237	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40238	pub const _MM_PERM_BADB: _MM_PERM_ENUM = `0x4D`;
40239	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40240	pub const _MM_PERM_BADC: _MM_PERM_ENUM = `0x4E`;
40241	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40242	pub const _MM_PERM_BADD: _MM_PERM_ENUM = `0x4F`;
40243	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40244	pub const _MM_PERM_BBAA: _MM_PERM_ENUM = `0x50`;
40245	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40246	pub const _MM_PERM_BBAB: _MM_PERM_ENUM = `0x51`;
40247	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40248	pub const _MM_PERM_BBAC: _MM_PERM_ENUM = `0x52`;
40249	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40250	pub const _MM_PERM_BBAD: _MM_PERM_ENUM = `0x53`;
40251	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40252	pub const _MM_PERM_BBBA: _MM_PERM_ENUM = `0x54`;
40253	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40254	pub const _MM_PERM_BBBB: _MM_PERM_ENUM = `0x55`;
40255	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40256	pub const _MM_PERM_BBBC: _MM_PERM_ENUM = `0x56`;
40257	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40258	pub const _MM_PERM_BBBD: _MM_PERM_ENUM = `0x57`;
40259	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40260	pub const _MM_PERM_BBCA: _MM_PERM_ENUM = `0x58`;
40261	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40262	pub const _MM_PERM_BBCB: _MM_PERM_ENUM = `0x59`;
40263	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40264	pub const _MM_PERM_BBCC: _MM_PERM_ENUM = `0x5A`;
40265	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40266	pub const _MM_PERM_BBCD: _MM_PERM_ENUM = `0x5B`;
40267	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40268	pub const _MM_PERM_BBDA: _MM_PERM_ENUM = `0x5C`;
40269	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40270	pub const _MM_PERM_BBDB: _MM_PERM_ENUM = `0x5D`;
40271	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40272	pub const _MM_PERM_BBDC: _MM_PERM_ENUM = `0x5E`;
40273	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40274	pub const _MM_PERM_BBDD: _MM_PERM_ENUM = `0x5F`;
40275	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40276	pub const _MM_PERM_BCAA: _MM_PERM_ENUM = `0x60`;
40277	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40278	pub const _MM_PERM_BCAB: _MM_PERM_ENUM = `0x61`;
40279	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40280	pub const _MM_PERM_BCAC: _MM_PERM_ENUM = `0x62`;
40281	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40282	pub const _MM_PERM_BCAD: _MM_PERM_ENUM = `0x63`;
40283	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40284	pub const _MM_PERM_BCBA: _MM_PERM_ENUM = `0x64`;
40285	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40286	pub const _MM_PERM_BCBB: _MM_PERM_ENUM = `0x65`;
40287	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40288	pub const _MM_PERM_BCBC: _MM_PERM_ENUM = `0x66`;
40289	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40290	pub const _MM_PERM_BCBD: _MM_PERM_ENUM = `0x67`;
40291	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40292	pub const _MM_PERM_BCCA: _MM_PERM_ENUM = `0x68`;
40293	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40294	pub const _MM_PERM_BCCB: _MM_PERM_ENUM = `0x69`;
40295	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40296	pub const _MM_PERM_BCCC: _MM_PERM_ENUM = `0x6A`;
40297	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40298	pub const _MM_PERM_BCCD: _MM_PERM_ENUM = `0x6B`;
40299	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40300	pub const _MM_PERM_BCDA: _MM_PERM_ENUM = `0x6C`;
40301	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40302	pub const _MM_PERM_BCDB: _MM_PERM_ENUM = `0x6D`;
40303	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40304	pub const _MM_PERM_BCDC: _MM_PERM_ENUM = `0x6E`;
40305	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40306	pub const _MM_PERM_BCDD: _MM_PERM_ENUM = `0x6F`;
40307	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40308	pub const _MM_PERM_BDAA: _MM_PERM_ENUM = `0x70`;
40309	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40310	pub const _MM_PERM_BDAB: _MM_PERM_ENUM = `0x71`;
40311	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40312	pub const _MM_PERM_BDAC: _MM_PERM_ENUM = `0x72`;
40313	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40314	pub const _MM_PERM_BDAD: _MM_PERM_ENUM = `0x73`;
40315	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40316	pub const _MM_PERM_BDBA: _MM_PERM_ENUM = `0x74`;
40317	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40318	pub const _MM_PERM_BDBB: _MM_PERM_ENUM = `0x75`;
40319	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40320	pub const _MM_PERM_BDBC: _MM_PERM_ENUM = `0x76`;
40321	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40322	pub const _MM_PERM_BDBD: _MM_PERM_ENUM = `0x77`;
40323	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40324	pub const _MM_PERM_BDCA: _MM_PERM_ENUM = `0x78`;
40325	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40326	pub const _MM_PERM_BDCB: _MM_PERM_ENUM = `0x79`;
40327	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40328	pub const _MM_PERM_BDCC: _MM_PERM_ENUM = `0x7A`;
40329	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40330	pub const _MM_PERM_BDCD: _MM_PERM_ENUM = `0x7B`;
40331	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40332	pub const _MM_PERM_BDDA: _MM_PERM_ENUM = `0x7C`;
40333	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40334	pub const _MM_PERM_BDDB: _MM_PERM_ENUM = `0x7D`;
40335	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40336	pub const _MM_PERM_BDDC: _MM_PERM_ENUM = `0x7E`;
40337	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40338	pub const _MM_PERM_BDDD: _MM_PERM_ENUM = `0x7F`;
40339	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40340	pub const _MM_PERM_CAAA: _MM_PERM_ENUM = `0x80`;
40341	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40342	pub const _MM_PERM_CAAB: _MM_PERM_ENUM = `0x81`;
40343	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40344	pub const _MM_PERM_CAAC: _MM_PERM_ENUM = `0x82`;
40345	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40346	pub const _MM_PERM_CAAD: _MM_PERM_ENUM = `0x83`;
40347	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40348	pub const _MM_PERM_CABA: _MM_PERM_ENUM = `0x84`;
40349	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40350	pub const _MM_PERM_CABB: _MM_PERM_ENUM = `0x85`;
40351	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40352	pub const _MM_PERM_CABC: _MM_PERM_ENUM = `0x86`;
40353	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40354	pub const _MM_PERM_CABD: _MM_PERM_ENUM = `0x87`;
40355	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40356	pub const _MM_PERM_CACA: _MM_PERM_ENUM = `0x88`;
40357	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40358	pub const _MM_PERM_CACB: _MM_PERM_ENUM = `0x89`;
40359	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40360	pub const _MM_PERM_CACC: _MM_PERM_ENUM = `0x8A`;
40361	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40362	pub const _MM_PERM_CACD: _MM_PERM_ENUM = `0x8B`;
40363	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40364	pub const _MM_PERM_CADA: _MM_PERM_ENUM = `0x8C`;
40365	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40366	pub const _MM_PERM_CADB: _MM_PERM_ENUM = `0x8D`;
40367	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40368	pub const _MM_PERM_CADC: _MM_PERM_ENUM = `0x8E`;
40369	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40370	pub const _MM_PERM_CADD: _MM_PERM_ENUM = `0x8F`;
40371	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40372	pub const _MM_PERM_CBAA: _MM_PERM_ENUM = `0x90`;
40373	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40374	pub const _MM_PERM_CBAB: _MM_PERM_ENUM = `0x91`;
40375	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40376	pub const _MM_PERM_CBAC: _MM_PERM_ENUM = `0x92`;
40377	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40378	pub const _MM_PERM_CBAD: _MM_PERM_ENUM = `0x93`;
40379	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40380	pub const _MM_PERM_CBBA: _MM_PERM_ENUM = `0x94`;
40381	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40382	pub const _MM_PERM_CBBB: _MM_PERM_ENUM = `0x95`;
40383	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40384	pub const _MM_PERM_CBBC: _MM_PERM_ENUM = `0x96`;
40385	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40386	pub const _MM_PERM_CBBD: _MM_PERM_ENUM = `0x97`;
40387	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40388	pub const _MM_PERM_CBCA: _MM_PERM_ENUM = `0x98`;
40389	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40390	pub const _MM_PERM_CBCB: _MM_PERM_ENUM = `0x99`;
40391	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40392	pub const _MM_PERM_CBCC: _MM_PERM_ENUM = `0x9A`;
40393	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40394	pub const _MM_PERM_CBCD: _MM_PERM_ENUM = `0x9B`;
40395	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40396	pub const _MM_PERM_CBDA: _MM_PERM_ENUM = `0x9C`;
40397	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40398	pub const _MM_PERM_CBDB: _MM_PERM_ENUM = `0x9D`;
40399	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40400	pub const _MM_PERM_CBDC: _MM_PERM_ENUM = `0x9E`;
40401	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40402	pub const _MM_PERM_CBDD: _MM_PERM_ENUM = `0x9F`;
40403	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40404	pub const _MM_PERM_CCAA: _MM_PERM_ENUM = `0xA0`;
40405	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40406	pub const _MM_PERM_CCAB: _MM_PERM_ENUM = `0xA1`;
40407	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40408	pub const _MM_PERM_CCAC: _MM_PERM_ENUM = `0xA2`;
40409	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40410	pub const _MM_PERM_CCAD: _MM_PERM_ENUM = `0xA3`;
40411	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40412	pub const _MM_PERM_CCBA: _MM_PERM_ENUM = `0xA4`;
40413	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40414	pub const _MM_PERM_CCBB: _MM_PERM_ENUM = `0xA5`;
40415	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40416	pub const _MM_PERM_CCBC: _MM_PERM_ENUM = `0xA6`;
40417	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40418	pub const _MM_PERM_CCBD: _MM_PERM_ENUM = `0xA7`;
40419	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40420	pub const _MM_PERM_CCCA: _MM_PERM_ENUM = `0xA8`;
40421	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40422	pub const _MM_PERM_CCCB: _MM_PERM_ENUM = `0xA9`;
40423	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40424	pub const _MM_PERM_CCCC: _MM_PERM_ENUM = `0xAA`;
40425	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40426	pub const _MM_PERM_CCCD: _MM_PERM_ENUM = `0xAB`;
40427	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40428	pub const _MM_PERM_CCDA: _MM_PERM_ENUM = `0xAC`;
40429	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40430	pub const _MM_PERM_CCDB: _MM_PERM_ENUM = `0xAD`;
40431	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40432	pub const _MM_PERM_CCDC: _MM_PERM_ENUM = `0xAE`;
40433	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40434	pub const _MM_PERM_CCDD: _MM_PERM_ENUM = `0xAF`;
40435	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40436	pub const _MM_PERM_CDAA: _MM_PERM_ENUM = `0xB0`;
40437	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40438	pub const _MM_PERM_CDAB: _MM_PERM_ENUM = `0xB1`;
40439	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40440	pub const _MM_PERM_CDAC: _MM_PERM_ENUM = `0xB2`;
40441	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40442	pub const _MM_PERM_CDAD: _MM_PERM_ENUM = `0xB3`;
40443	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40444	pub const _MM_PERM_CDBA: _MM_PERM_ENUM = `0xB4`;
40445	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40446	pub const _MM_PERM_CDBB: _MM_PERM_ENUM = `0xB5`;
40447	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40448	pub const _MM_PERM_CDBC: _MM_PERM_ENUM = `0xB6`;
40449	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40450	pub const _MM_PERM_CDBD: _MM_PERM_ENUM = `0xB7`;
40451	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40452	pub const _MM_PERM_CDCA: _MM_PERM_ENUM = `0xB8`;
40453	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40454	pub const _MM_PERM_CDCB: _MM_PERM_ENUM = `0xB9`;
40455	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40456	pub const _MM_PERM_CDCC: _MM_PERM_ENUM = `0xBA`;
40457	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40458	pub const _MM_PERM_CDCD: _MM_PERM_ENUM = `0xBB`;
40459	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40460	pub const _MM_PERM_CDDA: _MM_PERM_ENUM = `0xBC`;
40461	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40462	pub const _MM_PERM_CDDB: _MM_PERM_ENUM = `0xBD`;
40463	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40464	pub const _MM_PERM_CDDC: _MM_PERM_ENUM = `0xBE`;
40465	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40466	pub const _MM_PERM_CDDD: _MM_PERM_ENUM = `0xBF`;
40467	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40468	pub const _MM_PERM_DAAA: _MM_PERM_ENUM = `0xC0`;
40469	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40470	pub const _MM_PERM_DAAB: _MM_PERM_ENUM = `0xC1`;
40471	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40472	pub const _MM_PERM_DAAC: _MM_PERM_ENUM = `0xC2`;
40473	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40474	pub const _MM_PERM_DAAD: _MM_PERM_ENUM = `0xC3`;
40475	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40476	pub const _MM_PERM_DABA: _MM_PERM_ENUM = `0xC4`;
40477	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40478	pub const _MM_PERM_DABB: _MM_PERM_ENUM = `0xC5`;
40479	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40480	pub const _MM_PERM_DABC: _MM_PERM_ENUM = `0xC6`;
40481	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40482	pub const _MM_PERM_DABD: _MM_PERM_ENUM = `0xC7`;
40483	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40484	pub const _MM_PERM_DACA: _MM_PERM_ENUM = `0xC8`;
40485	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40486	pub const _MM_PERM_DACB: _MM_PERM_ENUM = `0xC9`;
40487	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40488	pub const _MM_PERM_DACC: _MM_PERM_ENUM = `0xCA`;
40489	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40490	pub const _MM_PERM_DACD: _MM_PERM_ENUM = `0xCB`;
40491	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40492	pub const _MM_PERM_DADA: _MM_PERM_ENUM = `0xCC`;
40493	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40494	pub const _MM_PERM_DADB: _MM_PERM_ENUM = `0xCD`;
40495	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40496	pub const _MM_PERM_DADC: _MM_PERM_ENUM = `0xCE`;
40497	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40498	pub const _MM_PERM_DADD: _MM_PERM_ENUM = `0xCF`;
40499	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40500	pub const _MM_PERM_DBAA: _MM_PERM_ENUM = `0xD0`;
40501	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40502	pub const _MM_PERM_DBAB: _MM_PERM_ENUM = `0xD1`;
40503	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40504	pub const _MM_PERM_DBAC: _MM_PERM_ENUM = `0xD2`;
40505	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40506	pub const _MM_PERM_DBAD: _MM_PERM_ENUM = `0xD3`;
40507	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40508	pub const _MM_PERM_DBBA: _MM_PERM_ENUM = `0xD4`;
40509	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40510	pub const _MM_PERM_DBBB: _MM_PERM_ENUM = `0xD5`;
40511	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40512	pub const _MM_PERM_DBBC: _MM_PERM_ENUM = `0xD6`;
40513	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40514	pub const _MM_PERM_DBBD: _MM_PERM_ENUM = `0xD7`;
40515	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40516	pub const _MM_PERM_DBCA: _MM_PERM_ENUM = `0xD8`;
40517	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40518	pub const _MM_PERM_DBCB: _MM_PERM_ENUM = `0xD9`;
40519	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40520	pub const _MM_PERM_DBCC: _MM_PERM_ENUM = `0xDA`;
40521	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40522	pub const _MM_PERM_DBCD: _MM_PERM_ENUM = `0xDB`;
40523	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40524	pub const _MM_PERM_DBDA: _MM_PERM_ENUM = `0xDC`;
40525	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40526	pub const _MM_PERM_DBDB: _MM_PERM_ENUM = `0xDD`;
40527	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40528	pub const _MM_PERM_DBDC: _MM_PERM_ENUM = `0xDE`;
40529	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40530	pub const _MM_PERM_DBDD: _MM_PERM_ENUM = `0xDF`;
40531	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40532	pub const _MM_PERM_DCAA: _MM_PERM_ENUM = `0xE0`;
40533	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40534	pub const _MM_PERM_DCAB: _MM_PERM_ENUM = `0xE1`;
40535	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40536	pub const _MM_PERM_DCAC: _MM_PERM_ENUM = `0xE2`;
40537	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40538	pub const _MM_PERM_DCAD: _MM_PERM_ENUM = `0xE3`;
40539	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40540	pub const _MM_PERM_DCBA: _MM_PERM_ENUM = `0xE4`;
40541	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40542	pub const _MM_PERM_DCBB: _MM_PERM_ENUM = `0xE5`;
40543	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40544	pub const _MM_PERM_DCBC: _MM_PERM_ENUM = `0xE6`;
40545	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40546	pub const _MM_PERM_DCBD: _MM_PERM_ENUM = `0xE7`;
40547	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40548	pub const _MM_PERM_DCCA: _MM_PERM_ENUM = `0xE8`;
40549	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40550	pub const _MM_PERM_DCCB: _MM_PERM_ENUM = `0xE9`;
40551	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40552	pub const _MM_PERM_DCCC: _MM_PERM_ENUM = `0xEA`;
40553	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40554	pub const _MM_PERM_DCCD: _MM_PERM_ENUM = `0xEB`;
40555	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40556	pub const _MM_PERM_DCDA: _MM_PERM_ENUM = `0xEC`;
40557	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40558	pub const _MM_PERM_DCDB: _MM_PERM_ENUM = `0xED`;
40559	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40560	pub const _MM_PERM_DCDC: _MM_PERM_ENUM = `0xEE`;
40561	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40562	pub const _MM_PERM_DCDD: _MM_PERM_ENUM = `0xEF`;
40563	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40564	pub const _MM_PERM_DDAA: _MM_PERM_ENUM = `0xF0`;
40565	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40566	pub const _MM_PERM_DDAB: _MM_PERM_ENUM = `0xF1`;
40567	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40568	pub const _MM_PERM_DDAC: _MM_PERM_ENUM = `0xF2`;
40569	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40570	pub const _MM_PERM_DDAD: _MM_PERM_ENUM = `0xF3`;
40571	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40572	pub const _MM_PERM_DDBA: _MM_PERM_ENUM = `0xF4`;
40573	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40574	pub const _MM_PERM_DDBB: _MM_PERM_ENUM = `0xF5`;
40575	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40576	pub const _MM_PERM_DDBC: _MM_PERM_ENUM = `0xF6`;
40577	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40578	pub const _MM_PERM_DDBD: _MM_PERM_ENUM = `0xF7`;
40579	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40580	pub const _MM_PERM_DDCA: _MM_PERM_ENUM = `0xF8`;
40581	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40582	pub const _MM_PERM_DDCB: _MM_PERM_ENUM = `0xF9`;
40583	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40584	pub const _MM_PERM_DDCC: _MM_PERM_ENUM = `0xFA`;
40585	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40586	pub const _MM_PERM_DDCD: _MM_PERM_ENUM = `0xFB`;
40587	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40588	pub const _MM_PERM_DDDA: _MM_PERM_ENUM = `0xFC`;
40589	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40590	pub const _MM_PERM_DDDB: _MM_PERM_ENUM = `0xFD`;
40591	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40592	pub const _MM_PERM_DDDC: _MM_PERM_ENUM = `0xFE`;
40593	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40594	pub const _MM_PERM_DDDD: _MM_PERM_ENUM = `0xFF`;
40595
40596	#[allow(improper_ctypes)]
40597	extern "C" {
40598	#[link_name = "llvm.x86.avx512.pmul.dq.512"]
40599	fn vpmuldq(a: i32x16, b: i32x16) -> i64x8;
40600	#[link_name = "llvm.x86.avx512.pmulu.dq.512"]
40601	fn vpmuludq(a: u32x16, b: u32x16) -> u64x8;
40602
40603	#[link_name = "llvm.x86.avx512.mask.pmaxs.d.512"]
40604	fn vpmaxsd(a: i32x16, b: i32x16) -> i32x16;
40605
40606	#[link_name = "llvm.x86.avx512.mask.pmaxs.q.512"]
40607	fn vpmaxsq(a: i64x8, b: i64x8) -> i64x8;
40608	#[link_name = "llvm.x86.avx512.mask.pmaxs.q.256"]
40609	fn vpmaxsq256(a: i64x4, b: i64x4) -> i64x4;
40610	#[link_name = "llvm.x86.avx512.mask.pmaxs.q.128"]
40611	fn vpmaxsq128(a: i64x2, b: i64x2) -> i64x2;
40612
40613	#[link_name = "llvm.x86.avx512.mask.pmins.d.512"]
40614	fn vpminsd(a: i32x16, b: i32x16) -> i32x16;
40615
40616	#[link_name = "llvm.x86.avx512.mask.pmins.q.512"]
40617	fn vpminsq(a: i64x8, b: i64x8) -> i64x8;
40618	#[link_name = "llvm.x86.avx512.mask.pmins.q.256"]
40619	fn vpminsq256(a: i64x4, b: i64x4) -> i64x4;
40620	#[link_name = "llvm.x86.avx512.mask.pmins.q.128"]
40621	fn vpminsq128(a: i64x2, b: i64x2) -> i64x2;
40622
40623	#[link_name = "llvm.x86.avx512.mask.pmaxu.d.512"]
40624	fn vpmaxud(a: u32x16, b: u32x16) -> u32x16;
40625
40626	#[link_name = "llvm.x86.avx512.mask.pmaxu.q.512"]
40627	fn vpmaxuq(a: u64x8, b: u64x8) -> u64x8;
40628	#[link_name = "llvm.x86.avx512.mask.pmaxu.q.256"]
40629	fn vpmaxuq256(a: u64x4, b: u64x4) -> u64x4;
40630	#[link_name = "llvm.x86.avx512.mask.pmaxu.q.128"]
40631	fn vpmaxuq128(a: u64x2, b: u64x2) -> u64x2;
40632
40633	#[link_name = "llvm.x86.avx512.mask.pminu.d.512"]
40634	fn vpminud(a: u32x16, b: u32x16) -> u32x16;
40635
40636	#[link_name = "llvm.x86.avx512.mask.pminu.q.512"]
40637	fn vpminuq(a: u64x8, b: u64x8) -> u64x8;
40638	#[link_name = "llvm.x86.avx512.mask.pminu.q.256"]
40639	fn vpminuq256(a: u64x4, b: u64x4) -> u64x4;
40640	#[link_name = "llvm.x86.avx512.mask.pminu.q.128"]
40641	fn vpminuq128(a: u64x2, b: u64x2) -> u64x2;
40642
40643	#[link_name = "llvm.x86.avx512.sqrt.ps.512"]
40644	fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
40645	#[link_name = "llvm.x86.avx512.sqrt.pd.512"]
40646	fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
40647
40648	#[link_name = "llvm.fma.v16f32"]
40649	fn vfmadd132ps(a: f32x16, b: f32x16, c: f32x16) -> f32x16;
40650	#[link_name = "llvm.fma.v8f64"]
40651	fn vfmadd132pd(a: f64x8, b: f64x8, c: f64x8) -> f64x8;
40652
40653	#[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
40654	fn vfmadd132psround(a: f32x16, b: f32x16, c: f32x16, rounding: i32) -> f32x16;
40655	#[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
40656	fn vfmadd132pdround(a: f64x8, b: f64x8, c: f64x8, rounding: i32) -> f64x8;
40657
40658	#[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
40659	fn vfmaddsub213ps(a: f32x16, b: f32x16, c: f32x16, d: i32) -> f32x16; //from clang
40660	#[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
40661	fn vfmaddsub213pd(a: f64x8, b: f64x8, c: f64x8, d: i32) -> f64x8; //from clang
40662
40663	#[link_name = "llvm.x86.avx512.add.ps.512"]
40664	fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
40665	#[link_name = "llvm.x86.avx512.add.pd.512"]
40666	fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
40667	#[link_name = "llvm.x86.avx512.sub.ps.512"]
40668	fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
40669	#[link_name = "llvm.x86.avx512.sub.pd.512"]
40670	fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
40671	#[link_name = "llvm.x86.avx512.mul.ps.512"]
40672	fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
40673	#[link_name = "llvm.x86.avx512.mul.pd.512"]
40674	fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
40675	#[link_name = "llvm.x86.avx512.div.ps.512"]
40676	fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
40677	#[link_name = "llvm.x86.avx512.div.pd.512"]
40678	fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
40679
40680	#[link_name = "llvm.x86.avx512.max.ps.512"]
40681	fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
40682	#[link_name = "llvm.x86.avx512.max.pd.512"]
40683	fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
40684	#[link_name = "llvm.x86.avx512.min.ps.512"]
40685	fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
40686	#[link_name = "llvm.x86.avx512.min.pd.512"]
40687	fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
40688
40689	#[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
40690	fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
40691
40692	#[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
40693	fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
40694	#[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
40695	fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
40696
40697	#[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
40698	fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
40699	#[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
40700	fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
40701	#[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
40702	fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
40703
40704	#[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
40705	fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
40706	#[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
40707	fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
40708	#[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
40709	fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
40710
40711	#[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
40712	fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
40713	#[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
40714	fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
40715	#[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
40716	fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
40717
40718	#[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
40719	fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
40720	#[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
40721	fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
40722	#[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
40723	fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
40724
40725	#[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
40726	fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
40727	#[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
40728	fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
40729	#[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
40730	fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
40731
40732	#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
40733	fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
40734	#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
40735	fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
40736	#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
40737	fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
40738
40739	#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
40740	fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
40741	#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
40742	fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
40743	#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
40744	fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
40745
40746	#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
40747	fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
40748	#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
40749	fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
40750	#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
40751	fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
40752
40753	#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
40754	fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
40755	#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
40756	fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
40757	#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
40758	fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
40759
40760	#[link_name = "llvm.x86.avx512.pternlog.d.512"]
40761	fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
40762	#[link_name = "llvm.x86.avx512.pternlog.d.256"]
40763	fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
40764	#[link_name = "llvm.x86.avx512.pternlog.d.128"]
40765	fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
40766
40767	#[link_name = "llvm.x86.avx512.pternlog.q.512"]
40768	fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
40769	#[link_name = "llvm.x86.avx512.pternlog.q.256"]
40770	fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
40771	#[link_name = "llvm.x86.avx512.pternlog.q.128"]
40772	fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
40773
40774	#[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
40775	fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
40776	#[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
40777	fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
40778	#[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
40779	fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
40780
40781	#[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
40782	fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
40783	#[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
40784	fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
40785	#[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
40786	fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
40787
40788	#[link_name = "llvm.x86.avx512.rcp14.ps.512"]
40789	fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
40790	#[link_name = "llvm.x86.avx512.rcp14.ps.256"]
40791	fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
40792	#[link_name = "llvm.x86.avx512.rcp14.ps.128"]
40793	fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
40794
40795	#[link_name = "llvm.x86.avx512.rcp14.pd.512"]
40796	fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
40797	#[link_name = "llvm.x86.avx512.rcp14.pd.256"]
40798	fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
40799	#[link_name = "llvm.x86.avx512.rcp14.pd.128"]
40800	fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
40801
40802	#[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
40803	fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
40804	#[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
40805	fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
40806	#[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
40807	fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
40808
40809	#[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
40810	fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
40811	#[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
40812	fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
40813	#[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
40814	fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
40815
40816	#[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
40817	fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
40818
40819	#[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
40820	fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
40821	#[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
40822	fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
40823	#[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
40824	fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
40825
40826	#[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
40827	fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
40828	#[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
40829	fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
40830
40831	#[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
40832	fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
40833
40834	#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
40835	fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
40836	#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
40837	fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
40838	#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
40839	fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
40840
40841	#[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
40842	fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
40843	#[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
40844	fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
40845
40846	#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
40847	fn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
40848	#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
40849	fn vcvtps2ph256(a: f32x8, sae: i32, src: i16x8, mask: u8) -> i16x8;
40850	#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
40851	fn vcvtps2ph128(a: f32x4, sae: i32, src: i16x8, mask: u8) -> i16x8;
40852
40853	#[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
40854	fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
40855
40856	#[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
40857	fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
40858	#[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
40859	fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
40860	#[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
40861	fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
40862
40863	#[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
40864	fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
40865	#[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
40866	fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
40867	#[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
40868	fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
40869
40870	#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
40871	fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
40872	#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
40873	fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
40874	#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
40875	fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
40876
40877	#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
40878	fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
40879	#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
40880	fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
40881	#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
40882	fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
40883
40884	#[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
40885	fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
40886	#[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
40887	fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
40888	#[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
40889	fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
40890
40891	#[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
40892	fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
40893	#[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
40894	fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
40895	#[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
40896	fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
40897	#[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
40898	fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
40899	#[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
40900	fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
40901
40902	#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
40903	fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40904	#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
40905	fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40906	#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
40907	fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40908
40909	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
40910	fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40911	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
40912	fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40913	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
40914	fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40915
40916	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
40917	fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40918	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
40919	fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40920	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
40921	fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40922
40923	#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
40924	fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40925	#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
40926	fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40927	#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
40928	fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40929
40930	#[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
40931	fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40932	#[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
40933	fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40934	#[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
40935	fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40936
40937	#[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
40938	fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40939	#[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
40940	fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40941	#[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
40942	fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40943
40944	#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
40945	fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40946	#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
40947	fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40948	#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
40949	fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40950
40951	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
40952	fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40953	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
40954	fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40955	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
40956	fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40957
40958	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
40959	fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40960	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
40961	fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40962	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
40963	fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40964
40965	#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
40966	fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40967	#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
40968	fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40969	#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
40970	fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40971
40972	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
40973	fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40974	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
40975	fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40976	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
40977	fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40978
40979	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
40980	fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40981	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
40982	fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40983	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
40984	fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40985
40986	#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
40987	fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40988	#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
40989	fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40990	#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
40991	fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40992
40993	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
40994	fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40995	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
40996	fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40997	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
40998	fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40999
41000	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
41001	fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
41002	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
41003	fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
41004	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
41005	fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
41006
41007	#[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
41008	fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
41009
41010	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
41011	fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
41012	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
41013	fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
41014	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
41015	fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
41016
41017	#[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
41018	fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
41019	#[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
41020	fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
41021	#[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
41022	fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
41023
41024	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
41025	fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
41026	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
41027	fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
41028	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
41029	fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
41030
41031	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
41032	fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
41033	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
41034	fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
41035	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
41036	fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
41037
41038	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
41039	fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
41040	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
41041	fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
41042	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
41043	fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
41044
41045	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
41046	fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
41047	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
41048	fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
41049	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
41050	fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
41051
41052	#[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
41053	fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
41054	#[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
41055	fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
41056	#[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
41057	fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
41058
41059	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
41060	fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
41061	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
41062	fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
41063	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
41064	fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
41065
41066	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
41067	fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
41068	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
41069	fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
41070	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
41071	fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
41072
41073	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
41074	fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
41075	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
41076	fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
41077	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
41078	fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
41079
41080	#[link_name = "llvm.x86.avx512.gather.dpd.512"]
41081	fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
41082	#[link_name = "llvm.x86.avx512.gather.dps.512"]
41083	fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
41084	#[link_name = "llvm.x86.avx512.gather.qpd.512"]
41085	fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
41086	#[link_name = "llvm.x86.avx512.gather.qps.512"]
41087	fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
41088	#[link_name = "llvm.x86.avx512.gather.dpq.512"]
41089	fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
41090	#[link_name = "llvm.x86.avx512.gather.dpi.512"]
41091	fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
41092	#[link_name = "llvm.x86.avx512.gather.qpq.512"]
41093	fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
41094	#[link_name = "llvm.x86.avx512.gather.qpi.512"]
41095	fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
41096
41097	#[link_name = "llvm.x86.avx512.scatter.dpd.512"]
41098	fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
41099	#[link_name = "llvm.x86.avx512.scatter.dps.512"]
41100	fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
41101	#[link_name = "llvm.x86.avx512.scatter.qpd.512"]
41102	fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
41103	#[link_name = "llvm.x86.avx512.scatter.qps.512"]
41104	fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
41105	#[link_name = "llvm.x86.avx512.scatter.dpq.512"]
41106	fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
41107	#[link_name = "llvm.x86.avx512.scattersiv4.di"]
41108	fn vpscatterdq256(slice: *mut i8, mask: i8, offsets: i32x4, src: i64x4, scale: i32);
41109
41110	#[link_name = "llvm.x86.avx512.scatter.dpi.512"]
41111	fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
41112	#[link_name = "llvm.x86.avx512.scatter.qpq.512"]
41113	fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
41114	#[link_name = "llvm.x86.avx512.scatter.qpi.512"]
41115	fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
41116
41117	#[link_name = "llvm.x86.avx512.mask.cmp.ss"]
41118	fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
41119	#[link_name = "llvm.x86.avx512.mask.cmp.sd"]
41120	fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
41121
41122	#[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
41123	fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
41124	#[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
41125	fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
41126	#[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
41127	fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
41128
41129	#[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
41130	fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
41131	#[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
41132	fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
41133	#[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
41134	fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
41135
41136	#[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
41137	fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
41138	#[link_name = "llvm.x86.avx512.mask.ucmp.q.256"]
41139	fn vpcmpuq256(a: i64x4, b: i64x4, op: i32, m: i8) -> i8;
41140	#[link_name = "llvm.x86.avx512.mask.ucmp.q.128"]
41141	fn vpcmpuq128(a: i64x2, b: i64x2, op: i32, m: i8) -> i8;
41142
41143	#[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
41144	fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
41145	#[link_name = "llvm.x86.avx512.mask.cmp.q.256"]
41146	fn vpcmpq256(a: i64x4, b: i64x4, op: i32, m: i8) -> i8;
41147	#[link_name = "llvm.x86.avx512.mask.cmp.q.128"]
41148	fn vpcmpq128(a: i64x2, b: i64x2, op: i32, m: i8) -> i8;
41149
41150	#[link_name = "llvm.x86.avx512.mask.ucmp.d.512"]
41151	fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
41152	#[link_name = "llvm.x86.avx512.mask.ucmp.d.256"]
41153	fn vpcmpud256(a: i32x8, b: i32x8, op: i32, m: i8) -> i8;
41154	#[link_name = "llvm.x86.avx512.mask.ucmp.d.128"]
41155	fn vpcmpud128(a: i32x4, b: i32x4, op: i32, m: i8) -> i8;
41156
41157	#[link_name = "llvm.x86.avx512.mask.cmp.d.512"]
41158	fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
41159	#[link_name = "llvm.x86.avx512.mask.cmp.d.256"]
41160	fn vpcmpd256(a: i32x8, b: i32x8, op: i32, m: i8) -> i8;
41161	#[link_name = "llvm.x86.avx512.mask.cmp.d.128"]
41162	fn vpcmpd128(a: i32x4, b: i32x4, op: i32, m: i8) -> i8;
41163
41164	#[link_name = "llvm.x86.avx512.mask.prol.d.512"]
41165	fn vprold(a: i32x16, i8: i32) -> i32x16;
41166	#[link_name = "llvm.x86.avx512.mask.prol.d.256"]
41167	fn vprold256(a: i32x8, i8: i32) -> i32x8;
41168	#[link_name = "llvm.x86.avx512.mask.prol.d.128"]
41169	fn vprold128(a: i32x4, i8: i32) -> i32x4;
41170
41171	#[link_name = "llvm.x86.avx512.mask.pror.d.512"]
41172	fn vprord(a: i32x16, i8: i32) -> i32x16;
41173	#[link_name = "llvm.x86.avx512.mask.pror.d.256"]
41174	fn vprord256(a: i32x8, i8: i32) -> i32x8;
41175	#[link_name = "llvm.x86.avx512.mask.pror.d.128"]
41176	fn vprord128(a: i32x4, i8: i32) -> i32x4;
41177
41178	#[link_name = "llvm.x86.avx512.mask.prol.q.512"]
41179	fn vprolq(a: i64x8, i8: i32) -> i64x8;
41180	#[link_name = "llvm.x86.avx512.mask.prol.q.256"]
41181	fn vprolq256(a: i64x4, i8: i32) -> i64x4;
41182	#[link_name = "llvm.x86.avx512.mask.prol.q.128"]
41183	fn vprolq128(a: i64x2, i8: i32) -> i64x2;
41184
41185	#[link_name = "llvm.x86.avx512.mask.pror.q.512"]
41186	fn vprorq(a: i64x8, i8: i32) -> i64x8;
41187	#[link_name = "llvm.x86.avx512.mask.pror.q.256"]
41188	fn vprorq256(a: i64x4, i8: i32) -> i64x4;
41189	#[link_name = "llvm.x86.avx512.mask.pror.q.128"]
41190	fn vprorq128(a: i64x2, i8: i32) -> i64x2;
41191
41192	#[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
41193	fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
41194	#[link_name = "llvm.x86.avx512.mask.prolv.d.256"]
41195	fn vprolvd256(a: i32x8, b: i32x8) -> i32x8;
41196	#[link_name = "llvm.x86.avx512.mask.prolv.d.128"]
41197	fn vprolvd128(a: i32x4, b: i32x4) -> i32x4;
41198
41199	#[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
41200	fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
41201	#[link_name = "llvm.x86.avx512.mask.prorv.d.256"]
41202	fn vprorvd256(a: i32x8, b: i32x8) -> i32x8;
41203	#[link_name = "llvm.x86.avx512.mask.prorv.d.128"]
41204	fn vprorvd128(a: i32x4, b: i32x4) -> i32x4;
41205
41206	#[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
41207	fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
41208	#[link_name = "llvm.x86.avx512.mask.prolv.q.256"]
41209	fn vprolvq256(a: i64x4, b: i64x4) -> i64x4;
41210	#[link_name = "llvm.x86.avx512.mask.prolv.q.128"]
41211	fn vprolvq128(a: i64x2, b: i64x2) -> i64x2;
41212
41213	#[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
41214	fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
41215	#[link_name = "llvm.x86.avx512.mask.prorv.q.256"]
41216	fn vprorvq256(a: i64x4, b: i64x4) -> i64x4;
41217	#[link_name = "llvm.x86.avx512.mask.prorv.q.128"]
41218	fn vprorvq128(a: i64x2, b: i64x2) -> i64x2;
41219
41220	#[link_name = "llvm.x86.avx512.psllv.d.512"]
41221	fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
41222	#[link_name = "llvm.x86.avx512.psrlv.d.512"]
41223	fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
41224	#[link_name = "llvm.x86.avx512.psllv.q.512"]
41225	fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
41226	#[link_name = "llvm.x86.avx512.psrlv.q.512"]
41227	fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
41228
41229	#[link_name = "llvm.x86.avx512.psll.d.512"]
41230	fn vpslld(a: i32x16, count: i32x4) -> i32x16;
41231	#[link_name = "llvm.x86.avx512.psrl.d.512"]
41232	fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
41233	#[link_name = "llvm.x86.avx512.psll.q.512"]
41234	fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
41235	#[link_name = "llvm.x86.avx512.psrl.q.512"]
41236	fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
41237
41238	#[link_name = "llvm.x86.avx512.psra.d.512"]
41239	fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
41240
41241	#[link_name = "llvm.x86.avx512.psra.q.512"]
41242	fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
41243	#[link_name = "llvm.x86.avx512.psra.q.256"]
41244	fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
41245	#[link_name = "llvm.x86.avx512.psra.q.128"]
41246	fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
41247
41248	#[link_name = "llvm.x86.avx512.psrav.d.512"]
41249	fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
41250
41251	#[link_name = "llvm.x86.avx512.psrav.q.512"]
41252	fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
41253	#[link_name = "llvm.x86.avx512.psrav.q.256"]
41254	fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
41255	#[link_name = "llvm.x86.avx512.psrav.q.128"]
41256	fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
41257
41258	#[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
41259	fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
41260	#[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
41261	fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
41262
41263	#[link_name = "llvm.x86.avx512.permvar.si.512"]
41264	fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
41265
41266	#[link_name = "llvm.x86.avx512.permvar.di.512"]
41267	fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
41268	#[link_name = "llvm.x86.avx512.permvar.di.256"]
41269	fn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
41270
41271	#[link_name = "llvm.x86.avx512.permvar.sf.512"]
41272	fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
41273
41274	#[link_name = "llvm.x86.avx512.permvar.df.512"]
41275	fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
41276	#[link_name = "llvm.x86.avx512.permvar.df.256"]
41277	fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
41278
41279	#[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
41280	fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
41281	#[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
41282	fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
41283	#[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
41284	fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
41285
41286	#[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
41287	fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
41288	#[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
41289	fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
41290	#[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
41291	fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
41292
41293	#[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
41294	fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
41295	#[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
41296	fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
41297	#[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
41298	fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
41299
41300	#[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
41301	fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
41302	#[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
41303	fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
41304	#[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
41305	fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
41306
41307	#[link_name = "llvm.x86.avx512.mask.compress.d.512"]
41308	fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
41309	#[link_name = "llvm.x86.avx512.mask.compress.d.256"]
41310	fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
41311	#[link_name = "llvm.x86.avx512.mask.compress.d.128"]
41312	fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
41313
41314	#[link_name = "llvm.x86.avx512.mask.compress.q.512"]
41315	fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
41316	#[link_name = "llvm.x86.avx512.mask.compress.q.256"]
41317	fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
41318	#[link_name = "llvm.x86.avx512.mask.compress.q.128"]
41319	fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
41320
41321	#[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
41322	fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
41323	#[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
41324	fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
41325	#[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
41326	fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
41327
41328	#[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
41329	fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
41330	#[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
41331	fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
41332	#[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
41333	fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
41334
41335	#[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
41336	fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
41337	#[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
41338	fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
41339	#[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
41340	fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
41341
41342	#[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
41343	fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
41344	#[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
41345	fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
41346	#[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
41347	fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
41348
41349	#[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
41350	fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
41351	#[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
41352	fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
41353	#[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
41354	fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
41355
41356	#[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
41357	fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
41358	#[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
41359	fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
41360	#[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
41361	fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
41362
41363	#[link_name = "llvm.x86.avx512.mask.expand.d.512"]
41364	fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
41365	#[link_name = "llvm.x86.avx512.mask.expand.d.256"]
41366	fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
41367	#[link_name = "llvm.x86.avx512.mask.expand.d.128"]
41368	fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
41369
41370	#[link_name = "llvm.x86.avx512.mask.expand.q.512"]
41371	fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
41372	#[link_name = "llvm.x86.avx512.mask.expand.q.256"]
41373	fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
41374	#[link_name = "llvm.x86.avx512.mask.expand.q.128"]
41375	fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
41376
41377	#[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
41378	fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
41379	#[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
41380	fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
41381	#[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
41382	fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
41383
41384	#[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
41385	fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
41386	#[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
41387	fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
41388	#[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
41389	fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
41390
41391	#[link_name = "llvm.x86.avx512.mask.add.ss.round"]
41392	fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41393	#[link_name = "llvm.x86.avx512.mask.add.sd.round"]
41394	fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41395	#[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
41396	fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41397	#[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
41398	fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41399	#[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
41400	fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41401	#[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
41402	fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41403	#[link_name = "llvm.x86.avx512.mask.div.ss.round"]
41404	fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41405	#[link_name = "llvm.x86.avx512.mask.div.sd.round"]
41406	fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41407	#[link_name = "llvm.x86.avx512.mask.max.ss.round"]
41408	fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
41409	#[link_name = "llvm.x86.avx512.mask.max.sd.round"]
41410	fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
41411	#[link_name = "llvm.x86.avx512.mask.min.ss.round"]
41412	fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
41413	#[link_name = "llvm.x86.avx512.mask.min.sd.round"]
41414	fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
41415	#[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
41416	fn vsqrtss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41417	#[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
41418	fn vsqrtsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41419	#[link_name = "llvm.x86.avx512.mask.getexp.ss"]
41420	fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
41421	#[link_name = "llvm.x86.avx512.mask.getexp.sd"]
41422	fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
41423	#[link_name = "llvm.x86.avx512.mask.getmant.ss"]
41424	fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
41425	#[link_name = "llvm.x86.avx512.mask.getmant.sd"]
41426	fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
41427
41428	#[link_name = "llvm.x86.avx512.rsqrt14.ss"]
41429	fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
41430	#[link_name = "llvm.x86.avx512.rsqrt14.sd"]
41431	fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
41432	#[link_name = "llvm.x86.avx512.rcp14.ss"]
41433	fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
41434	#[link_name = "llvm.x86.avx512.rcp14.sd"]
41435	fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
41436
41437	#[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
41438	fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
41439	#[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
41440	fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
41441	#[link_name = "llvm.x86.avx512.mask.scalef.ss"]
41442	fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41443	#[link_name = "llvm.x86.avx512.mask.scalef.sd"]
41444	fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41445
41446	#[link_name = "llvm.x86.avx512.vfmadd.f32"]
41447	fn vfmadd132ss(a: f32, b: f32, c: f32, rounding: i32) -> f32;
41448	#[link_name = "llvm.x86.avx512.vfmadd.f64"]
41449	fn vfmadd132sd(a: f64, b: f64, c: f64, rounding: i32) -> f64;
41450
41451	#[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
41452	fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
41453	#[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
41454	fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
41455	#[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
41456	fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
41457	#[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
41458	fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
41459
41460	#[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
41461	fn vcvtss2sd(a: f64x2, a: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
41462	#[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
41463	fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41464
41465	#[link_name = "llvm.x86.avx512.vcvtss2si32"]
41466	fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
41467	#[link_name = "llvm.x86.avx512.vcvtss2usi32"]
41468	fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
41469
41470	#[link_name = "llvm.x86.avx512.vcvtsd2si32"]
41471	fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
41472	#[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
41473	fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
41474
41475	#[link_name = "llvm.x86.avx512.cvtsi2ss32"]
41476	fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
41477	#[link_name = "llvm.x86.avx512.cvtsi2sd64"]
41478	fn vcvtsi2sd(a: f64x2, b: i64, rounding: i32) -> f64x2;
41479
41480	#[link_name = "llvm.x86.avx512.cvtusi2ss"]
41481	fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
41482	#[link_name = "llvm.x86.avx512.cvtusi642sd"]
41483	fn vcvtusi2sd(a: f64x2, b: u64, rounding: i32) -> f64x2;
41484
41485	#[link_name = "llvm.x86.avx512.vcomi.ss"]
41486	fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
41487	#[link_name = "llvm.x86.avx512.vcomi.sd"]
41488	fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
41489	}
41490
41491	#[cfg(test)]
41492	mod tests {
41493
41494	use stdarch_test::simd_test;
41495
41496	use crate::core_arch::x86::*;
41497	use crate::hint::black_box;
41498	use crate::mem::{self};
41499
41500	#[simd_test(enable = "avx512f")]
41501	unsafe fn test_mm512_abs_epi32() {
41502	#[rustfmt::skip]
41503	let a = _mm512_setr_epi32(
41504	`0`, `1`, `-1`, i32::MAX,
41505	i32::MIN, `100`, `-100`, `-32`,
41506	`0`, `1`, `-1`, i32::MAX,
41507	i32::MIN, `100`, `-100`, `-32`,
41508	);
41509	let r = _mm512_abs_epi32(a);
41510	#[rustfmt::skip]
41511	let e = _mm512_setr_epi32(
41512	`0`, `1`, `1`, i32::MAX,
41513	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
41514	`0`, `1`, `1`, i32::MAX,
41515	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
41516	);
41517	assert_eq_m512i(r, e);
41518	}
41519
41520	#[simd_test(enable = "avx512f")]
41521	unsafe fn test_mm512_mask_abs_epi32() {
41522	#[rustfmt::skip]
41523	let a = _mm512_setr_epi32(
41524	`0`, `1`, `-1`, i32::MAX,
41525	i32::MIN, `100`, `-100`, `-32`,
41526	`0`, `1`, `-1`, i32::MAX,
41527	i32::MIN, `100`, `-100`, `-32`,
41528	);
41529	let r = _mm512_mask_abs_epi32(a, `0`, a);
41530	assert_eq_m512i(r, a);
41531	let r = _mm512_mask_abs_epi32(a, `0b00000000_11111111`, a);
41532	#[rustfmt::skip]
41533	let e = _mm512_setr_epi32(
41534	`0`, `1`, `1`, i32::MAX,
41535	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
41536	`0`, `1`, `-1`, i32::MAX,
41537	i32::MIN, `100`, `-100`, `-32`,
41538	);
41539	assert_eq_m512i(r, e);
41540	}
41541
41542	#[simd_test(enable = "avx512f")]
41543	unsafe fn test_mm512_maskz_abs_epi32() {
41544	#[rustfmt::skip]
41545	let a = _mm512_setr_epi32(
41546	`0`, `1`, `-1`, i32::MAX,
41547	i32::MIN, `100`, `-100`, `-32`,
41548	`0`, `1`, `-1`, i32::MAX,
41549	i32::MIN, `100`, `-100`, `-32`,
41550	);
41551	let r = _mm512_maskz_abs_epi32(`0`, a);
41552	assert_eq_m512i(r, _mm512_setzero_si512());
41553	let r = _mm512_maskz_abs_epi32(`0b00000000_11111111`, a);
41554	#[rustfmt::skip]
41555	let e = _mm512_setr_epi32(
41556	`0`, `1`, `1`, i32::MAX,
41557	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
41558	`0`, `0`, `0`, `0`,
41559	`0`, `0`, `0`, `0`,
41560	);
41561	assert_eq_m512i(r, e);
41562	}
41563
41564	#[simd_test(enable = "avx512f,avx512vl")]
41565	unsafe fn test_mm256_mask_abs_epi32() {
41566	#[rustfmt::skip]
41567	let a = _mm256_setr_epi32(
41568	`0`, `1`, `-1`, i32::MAX,
41569	i32::MIN, `100`, `-100`, `-32`,
41570	);
41571	let r = _mm256_mask_abs_epi32(a, `0`, a);
41572	assert_eq_m256i(r, a);
41573	let r = _mm256_mask_abs_epi32(a, `0b00001111`, a);
41574	#[rustfmt::skip]
41575	let e = _mm256_setr_epi32(
41576	`0`, `1`, `1`, i32::MAX,
41577	i32::MAX.wrapping_add(`1`), `100`, `-100`, `-32`,
41578	);
41579	assert_eq_m256i(r, e);
41580	}
41581
41582	#[simd_test(enable = "avx512f,avx512vl")]
41583	unsafe fn test_mm256_maskz_abs_epi32() {
41584	#[rustfmt::skip]
41585	let a = _mm256_setr_epi32(
41586	`0`, `1`, `-1`, i32::MAX,
41587	i32::MIN, `100`, `-100`, `-32`,
41588	);
41589	let r = _mm256_maskz_abs_epi32(`0`, a);
41590	assert_eq_m256i(r, _mm256_setzero_si256());
41591	let r = _mm256_maskz_abs_epi32(`0b00001111`, a);
41592	#[rustfmt::skip]
41593	let e = _mm256_setr_epi32(
41594	`0`, `1`, `1`, i32::MAX,
41595	`0`, `0`, `0`, `0`,
41596	);
41597	assert_eq_m256i(r, e);
41598	}
41599
41600	#[simd_test(enable = "avx512f,avx512vl")]
41601	unsafe fn test_mm_mask_abs_epi32() {
41602	let a = _mm_setr_epi32(i32::MIN, `100`, `-100`, `-32`);
41603	let r = _mm_mask_abs_epi32(a, `0`, a);
41604	assert_eq_m128i(r, a);
41605	let r = _mm_mask_abs_epi32(a, `0b00001111`, a);
41606	let e = _mm_setr_epi32(i32::MAX.wrapping_add(`1`), `100`, `100`, `32`);
41607	assert_eq_m128i(r, e);
41608	}
41609
41610	#[simd_test(enable = "avx512f,avx512vl")]
41611	unsafe fn test_mm_maskz_abs_epi32() {
41612	let a = _mm_setr_epi32(i32::MIN, `100`, `-100`, `-32`);
41613	let r = _mm_maskz_abs_epi32(`0`, a);
41614	assert_eq_m128i(r, _mm_setzero_si128());
41615	let r = _mm_maskz_abs_epi32(`0b00001111`, a);
41616	let e = _mm_setr_epi32(i32::MAX.wrapping_add(`1`), `100`, `100`, `32`);
41617	assert_eq_m128i(r, e);
41618	}
41619
41620	#[simd_test(enable = "avx512f")]
41621	unsafe fn test_mm512_abs_ps() {
41622	#[rustfmt::skip]
41623	let a = _mm512_setr_ps(
41624	`0.`, `1.`, `-1.`, f32::MAX,
41625	f32::MIN, `100.`, `-100.`, `-32.`,
41626	`0.`, `1.`, `-1.`, f32::MAX,
41627	f32::MIN, `100.`, `-100.`, `-32.`,
41628	);
41629	let r = _mm512_abs_ps(a);
41630	#[rustfmt::skip]
41631	let e = _mm512_setr_ps(
41632	`0.`, `1.`, `1.`, f32::MAX,
41633	f32::MAX, `100.`, `100.`, `32.`,
41634	`0.`, `1.`, `1.`, f32::MAX,
41635	f32::MAX, `100.`, `100.`, `32.`,
41636	);
41637	assert_eq_m512(r, e);
41638	}
41639
41640	#[simd_test(enable = "avx512f")]
41641	unsafe fn test_mm512_mask_abs_ps() {
41642	#[rustfmt::skip]
41643	let a = _mm512_setr_ps(
41644	`0.`, `1.`, `-1.`, f32::MAX,
41645	f32::MIN, `100.`, `-100.`, `-32.`,
41646	`0.`, `1.`, `-1.`, f32::MAX,
41647	f32::MIN, `100.`, `-100.`, `-32.`,
41648	);
41649	let r = _mm512_mask_abs_ps(a, `0`, a);
41650	assert_eq_m512(r, a);
41651	let r = _mm512_mask_abs_ps(a, `0b00000000_11111111`, a);
41652	#[rustfmt::skip]
41653	let e = _mm512_setr_ps(
41654	`0.`, `1.`, `1.`, f32::MAX,
41655	f32::MAX, `100.`, `100.`, `32.`,
41656	`0.`, `1.`, `-1.`, f32::MAX,
41657	f32::MIN, `100.`, `-100.`, `-32.`,
41658	);
41659	assert_eq_m512(r, e);
41660	}
41661
41662	#[simd_test(enable = "avx512f")]
41663	unsafe fn test_mm512_mask_mov_epi32() {
41664	let src = _mm512_set1_epi32(`1`);
41665	let a = _mm512_set1_epi32(`2`);
41666	let r = _mm512_mask_mov_epi32(src, `0`, a);
41667	assert_eq_m512i(r, src);
41668	let r = _mm512_mask_mov_epi32(src, `0b11111111_11111111`, a);
41669	assert_eq_m512i(r, a);
41670	}
41671
41672	#[simd_test(enable = "avx512f")]
41673	unsafe fn test_mm512_maskz_mov_epi32() {
41674	let a = _mm512_set1_epi32(`2`);
41675	let r = _mm512_maskz_mov_epi32(`0`, a);
41676	assert_eq_m512i(r, _mm512_setzero_si512());
41677	let r = _mm512_maskz_mov_epi32(`0b11111111_11111111`, a);
41678	assert_eq_m512i(r, a);
41679	}
41680
41681	#[simd_test(enable = "avx512f,avx512vl")]
41682	unsafe fn test_mm256_mask_mov_epi32() {
41683	let src = _mm256_set1_epi32(`1`);
41684	let a = _mm256_set1_epi32(`2`);
41685	let r = _mm256_mask_mov_epi32(src, `0`, a);
41686	assert_eq_m256i(r, src);
41687	let r = _mm256_mask_mov_epi32(src, `0b11111111`, a);
41688	assert_eq_m256i(r, a);
41689	}
41690
41691	#[simd_test(enable = "avx512f,avx512vl")]
41692	unsafe fn test_mm256_maskz_mov_epi32() {
41693	let a = _mm256_set1_epi32(`2`);
41694	let r = _mm256_maskz_mov_epi32(`0`, a);
41695	assert_eq_m256i(r, _mm256_setzero_si256());
41696	let r = _mm256_maskz_mov_epi32(`0b11111111`, a);
41697	assert_eq_m256i(r, a);
41698	}
41699
41700	#[simd_test(enable = "avx512f,avx512vl")]
41701	unsafe fn test_mm_mask_mov_epi32() {
41702	let src = _mm_set1_epi32(`1`);
41703	let a = _mm_set1_epi32(`2`);
41704	let r = _mm_mask_mov_epi32(src, `0`, a);
41705	assert_eq_m128i(r, src);
41706	let r = _mm_mask_mov_epi32(src, `0b00001111`, a);
41707	assert_eq_m128i(r, a);
41708	}
41709
41710	#[simd_test(enable = "avx512f,avx512vl")]
41711	unsafe fn test_mm_maskz_mov_epi32() {
41712	let a = _mm_set1_epi32(`2`);
41713	let r = _mm_maskz_mov_epi32(`0`, a);
41714	assert_eq_m128i(r, _mm_setzero_si128());
41715	let r = _mm_maskz_mov_epi32(`0b00001111`, a);
41716	assert_eq_m128i(r, a);
41717	}
41718
41719	#[simd_test(enable = "avx512f")]
41720	unsafe fn test_mm512_mask_mov_ps() {
41721	let src = _mm512_set1_ps(`1.`);
41722	let a = _mm512_set1_ps(`2.`);
41723	let r = _mm512_mask_mov_ps(src, `0`, a);
41724	assert_eq_m512(r, src);
41725	let r = _mm512_mask_mov_ps(src, `0b11111111_11111111`, a);
41726	assert_eq_m512(r, a);
41727	}
41728
41729	#[simd_test(enable = "avx512f")]
41730	unsafe fn test_mm512_maskz_mov_ps() {
41731	let a = _mm512_set1_ps(`2.`);
41732	let r = _mm512_maskz_mov_ps(`0`, a);
41733	assert_eq_m512(r, _mm512_setzero_ps());
41734	let r = _mm512_maskz_mov_ps(`0b11111111_11111111`, a);
41735	assert_eq_m512(r, a);
41736	}
41737
41738	#[simd_test(enable = "avx512f,avx512vl")]
41739	unsafe fn test_mm256_mask_mov_ps() {
41740	let src = _mm256_set1_ps(`1.`);
41741	let a = _mm256_set1_ps(`2.`);
41742	let r = _mm256_mask_mov_ps(src, `0`, a);
41743	assert_eq_m256(r, src);
41744	let r = _mm256_mask_mov_ps(src, `0b11111111`, a);
41745	assert_eq_m256(r, a);
41746	}
41747
41748	#[simd_test(enable = "avx512f,avx512vl")]
41749	unsafe fn test_mm256_maskz_mov_ps() {
41750	let a = _mm256_set1_ps(`2.`);
41751	let r = _mm256_maskz_mov_ps(`0`, a);
41752	assert_eq_m256(r, _mm256_setzero_ps());
41753	let r = _mm256_maskz_mov_ps(`0b11111111`, a);
41754	assert_eq_m256(r, a);
41755	}
41756
41757	#[simd_test(enable = "avx512f,avx512vl")]
41758	unsafe fn test_mm_mask_mov_ps() {
41759	let src = _mm_set1_ps(`1.`);
41760	let a = _mm_set1_ps(`2.`);
41761	let r = _mm_mask_mov_ps(src, `0`, a);
41762	assert_eq_m128(r, src);
41763	let r = _mm_mask_mov_ps(src, `0b00001111`, a);
41764	assert_eq_m128(r, a);
41765	}
41766
41767	#[simd_test(enable = "avx512f,avx512vl")]
41768	unsafe fn test_mm_maskz_mov_ps() {
41769	let a = _mm_set1_ps(`2.`);
41770	let r = _mm_maskz_mov_ps(`0`, a);
41771	assert_eq_m128(r, _mm_setzero_ps());
41772	let r = _mm_maskz_mov_ps(`0b00001111`, a);
41773	assert_eq_m128(r, a);
41774	}
41775
41776	#[simd_test(enable = "avx512f")]
41777	unsafe fn test_mm512_add_epi32() {
41778	#[rustfmt::skip]
41779	let a = _mm512_setr_epi32(
41780	`0`, `1`, `-1`, i32::MAX,
41781	i32::MIN, `100`, `-100`, `-32`,
41782	`0`, `1`, `-1`, i32::MAX,
41783	i32::MIN, `100`, `-100`, `-32`,
41784	);
41785	let b = _mm512_set1_epi32(`1`);
41786	let r = _mm512_add_epi32(a, b);
41787	#[rustfmt::skip]
41788	let e = _mm512_setr_epi32(
41789	`1`, `2`, `0`, i32::MIN,
41790	i32::MIN + `1`, `101`, `-99`, `-31`,
41791	`1`, `2`, `0`, i32::MIN,
41792	i32::MIN + `1`, `101`, `-99`, `-31`,
41793	);
41794	assert_eq_m512i(r, e);
41795	}
41796
41797	#[simd_test(enable = "avx512f")]
41798	unsafe fn test_mm512_mask_add_epi32() {
41799	#[rustfmt::skip]
41800	let a = _mm512_setr_epi32(
41801	`0`, `1`, `-1`, i32::MAX,
41802	i32::MIN, `100`, `-100`, `-32`,
41803	`0`, `1`, `-1`, i32::MAX,
41804	i32::MIN, `100`, `-100`, `-32`,
41805	);
41806	let b = _mm512_set1_epi32(`1`);
41807	let r = _mm512_mask_add_epi32(a, `0`, a, b);
41808	assert_eq_m512i(r, a);
41809	let r = _mm512_mask_add_epi32(a, `0b00000000_11111111`, a, b);
41810	#[rustfmt::skip]
41811	let e = _mm512_setr_epi32(
41812	`1`, `2`, `0`, i32::MIN,
41813	i32::MIN + `1`, `101`, `-99`, `-31`,
41814	`0`, `1`, `-1`, i32::MAX,
41815	i32::MIN, `100`, `-100`, `-32`,
41816	);
41817	assert_eq_m512i(r, e);
41818	}
41819
41820	#[simd_test(enable = "avx512f")]
41821	unsafe fn test_mm512_maskz_add_epi32() {
41822	#[rustfmt::skip]
41823	let a = _mm512_setr_epi32(
41824	`0`, `1`, `-1`, i32::MAX,
41825	i32::MIN, `100`, `-100`, `-32`,
41826	`0`, `1`, `-1`, i32::MAX,
41827	i32::MIN, `100`, `-100`, `-32`,
41828	);
41829	let b = _mm512_set1_epi32(`1`);
41830	let r = _mm512_maskz_add_epi32(`0`, a, b);
41831	assert_eq_m512i(r, _mm512_setzero_si512());
41832	let r = _mm512_maskz_add_epi32(`0b00000000_11111111`, a, b);
41833	#[rustfmt::skip]
41834	let e = _mm512_setr_epi32(
41835	`1`, `2`, `0`, i32::MIN,
41836	i32::MIN + `1`, `101`, `-99`, `-31`,
41837	`0`, `0`, `0`, `0`,
41838	`0`, `0`, `0`, `0`,
41839	);
41840	assert_eq_m512i(r, e);
41841	}
41842
41843	#[simd_test(enable = "avx512f,avx512vl")]
41844	unsafe fn test_mm256_mask_add_epi32() {
41845	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
41846	let b = _mm256_set1_epi32(`1`);
41847	let r = _mm256_mask_add_epi32(a, `0`, a, b);
41848	assert_eq_m256i(r, a);
41849	let r = _mm256_mask_add_epi32(a, `0b11111111`, a, b);
41850	let e = _mm256_set_epi32(`1`, `2`, `0`, i32::MIN, i32::MIN + `1`, `101`, `-99`, `-31`);
41851	assert_eq_m256i(r, e);
41852	}
41853
41854	#[simd_test(enable = "avx512f,avx512vl")]
41855	unsafe fn test_mm256_maskz_add_epi32() {
41856	let a = _mm256_setr_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
41857	let b = _mm256_set1_epi32(`1`);
41858	let r = _mm256_maskz_add_epi32(`0`, a, b);
41859	assert_eq_m256i(r, _mm256_setzero_si256());
41860	let r = _mm256_maskz_add_epi32(`0b11111111`, a, b);
41861	let e = _mm256_setr_epi32(`1`, `2`, `0`, i32::MIN, i32::MIN + `1`, `101`, `-99`, `-31`);
41862	assert_eq_m256i(r, e);
41863	}
41864
41865	#[simd_test(enable = "avx512f,avx512vl")]
41866	unsafe fn test_mm_mask_add_epi32() {
41867	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
41868	let b = _mm_set1_epi32(`1`);
41869	let r = _mm_mask_add_epi32(a, `0`, a, b);
41870	assert_eq_m128i(r, a);
41871	let r = _mm_mask_add_epi32(a, `0b00001111`, a, b);
41872	let e = _mm_set_epi32(`2`, `0`, i32::MIN, i32::MIN + `1`);
41873	assert_eq_m128i(r, e);
41874	}
41875
41876	#[simd_test(enable = "avx512f,avx512vl")]
41877	unsafe fn test_mm_maskz_add_epi32() {
41878	let a = _mm_setr_epi32(`1`, `-1`, i32::MAX, i32::MIN);
41879	let b = _mm_set1_epi32(`1`);
41880	let r = _mm_maskz_add_epi32(`0`, a, b);
41881	assert_eq_m128i(r, _mm_setzero_si128());
41882	let r = _mm_maskz_add_epi32(`0b00001111`, a, b);
41883	let e = _mm_setr_epi32(`2`, `0`, i32::MIN, i32::MIN + `1`);
41884	assert_eq_m128i(r, e);
41885	}
41886
41887	#[simd_test(enable = "avx512f")]
41888	unsafe fn test_mm512_add_ps() {
41889	#[rustfmt::skip]
41890	let a = _mm512_setr_ps(
41891	`0.`, `1.`, `-1.`, f32::MAX,
41892	f32::MIN, `100.`, `-100.`, `-32.`,
41893	`0.`, `1.`, `-1.`, f32::MAX,
41894	f32::MIN, `100.`, `-100.`, `-32.`,
41895	);
41896	let b = _mm512_set1_ps(`1.`);
41897	let r = _mm512_add_ps(a, b);
41898	#[rustfmt::skip]
41899	let e = _mm512_setr_ps(
41900	`1.`, `2.`, `0.`, f32::MAX,
41901	f32::MIN + `1.`, `101.`, `-99.`, `-31.`,
41902	`1.`, `2.`, `0.`, f32::MAX,
41903	f32::MIN + `1.`, `101.`, `-99.`, `-31.`,
41904	);
41905	assert_eq_m512(r, e);
41906	}
41907
41908	#[simd_test(enable = "avx512f")]
41909	unsafe fn test_mm512_mask_add_ps() {
41910	#[rustfmt::skip]
41911	let a = _mm512_setr_ps(
41912	`0.`, `1.`, `-1.`, f32::MAX,
41913	f32::MIN, `100.`, `-100.`, `-32.`,
41914	`0.`, `1.`, `-1.`, f32::MAX,
41915	f32::MIN, `100.`, `-100.`, `-32.`,
41916	);
41917	let b = _mm512_set1_ps(`1.`);
41918	let r = _mm512_mask_add_ps(a, `0`, a, b);
41919	assert_eq_m512(r, a);
41920	let r = _mm512_mask_add_ps(a, `0b00000000_11111111`, a, b);
41921	#[rustfmt::skip]
41922	let e = _mm512_setr_ps(
41923	`1.`, `2.`, `0.`, f32::MAX,
41924	f32::MIN + `1.`, `101.`, `-99.`, `-31.`,
41925	`0.`, `1.`, `-1.`, f32::MAX,
41926	f32::MIN, `100.`, `-100.`, `-32.`,
41927	);
41928	assert_eq_m512(r, e);
41929	}
41930
41931	#[simd_test(enable = "avx512f")]
41932	unsafe fn test_mm512_maskz_add_ps() {
41933	#[rustfmt::skip]
41934	let a = _mm512_setr_ps(
41935	`0.`, `1.`, `-1.`, f32::MAX,
41936	f32::MIN, `100.`, `-100.`, `-32.`,
41937	`0.`, `1.`, `-1.`, f32::MAX,
41938	f32::MIN, `100.`, `-100.`, `-32.`,
41939	);
41940	let b = _mm512_set1_ps(`1.`);
41941	let r = _mm512_maskz_add_ps(`0`, a, b);
41942	assert_eq_m512(r, _mm512_setzero_ps());
41943	let r = _mm512_maskz_add_ps(`0b00000000_11111111`, a, b);
41944	#[rustfmt::skip]
41945	let e = _mm512_setr_ps(
41946	`1.`, `2.`, `0.`, f32::MAX,
41947	f32::MIN + `1.`, `101.`, `-99.`, `-31.`,
41948	`0.`, `0.`, `0.`, `0.`,
41949	`0.`, `0.`, `0.`, `0.`,
41950	);
41951	assert_eq_m512(r, e);
41952	}
41953
41954	#[simd_test(enable = "avx512f,avx512vl")]
41955	unsafe fn test_mm256_mask_add_ps() {
41956	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
41957	let b = _mm256_set1_ps(`1.`);
41958	let r = _mm256_mask_add_ps(a, `0`, a, b);
41959	assert_eq_m256(r, a);
41960	let r = _mm256_mask_add_ps(a, `0b11111111`, a, b);
41961	let e = _mm256_set_ps(`1.`, `2.`, `0.`, f32::MAX, f32::MIN + `1.`, `101.`, `-99.`, `-31.`);
41962	assert_eq_m256(r, e);
41963	}
41964
41965	#[simd_test(enable = "avx512f,avx512vl")]
41966	unsafe fn test_mm256_maskz_add_ps() {
41967	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
41968	let b = _mm256_set1_ps(`1.`);
41969	let r = _mm256_maskz_add_ps(`0`, a, b);
41970	assert_eq_m256(r, _mm256_setzero_ps());
41971	let r = _mm256_maskz_add_ps(`0b11111111`, a, b);
41972	let e = _mm256_set_ps(`1.`, `2.`, `0.`, f32::MAX, f32::MIN + `1.`, `101.`, `-99.`, `-31.`);
41973	assert_eq_m256(r, e);
41974	}
41975
41976	#[simd_test(enable = "avx512f,avx512vl")]
41977	unsafe fn test_mm_mask_add_ps() {
41978	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
41979	let b = _mm_set1_ps(`1.`);
41980	let r = _mm_mask_add_ps(a, `0`, a, b);
41981	assert_eq_m128(r, a);
41982	let r = _mm_mask_add_ps(a, `0b00001111`, a, b);
41983	let e = _mm_set_ps(`2.`, `0.`, f32::MAX, f32::MIN + `1.`);
41984	assert_eq_m128(r, e);
41985	}
41986
41987	#[simd_test(enable = "avx512f,avx512vl")]
41988	unsafe fn test_mm_maskz_add_ps() {
41989	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
41990	let b = _mm_set1_ps(`1.`);
41991	let r = _mm_maskz_add_ps(`0`, a, b);
41992	assert_eq_m128(r, _mm_setzero_ps());
41993	let r = _mm_maskz_add_ps(`0b00001111`, a, b);
41994	let e = _mm_set_ps(`2.`, `0.`, f32::MAX, f32::MIN + `1.`);
41995	assert_eq_m128(r, e);
41996	}
41997
41998	#[simd_test(enable = "avx512f")]
41999	unsafe fn test_mm512_sub_epi32() {
42000	#[rustfmt::skip]
42001	let a = _mm512_setr_epi32(
42002	`0`, `1`, `-1`, i32::MAX,
42003	i32::MIN, `100`, `-100`, `-32`,
42004	`0`, `1`, `-1`, i32::MAX,
42005	i32::MIN, `100`, `-100`, `-32`,
42006	);
42007	let b = _mm512_set1_epi32(`1`);
42008	let r = _mm512_sub_epi32(a, b);
42009	#[rustfmt::skip]
42010	let e = _mm512_setr_epi32(
42011	`-1`, `0`, `-2`, i32::MAX - `1`,
42012	i32::MAX, `99`, `-101`, `-33`,
42013	`-1`, `0`, `-2`, i32::MAX - `1`,
42014	i32::MAX, `99`, `-101`, `-33`,
42015	);
42016	assert_eq_m512i(r, e);
42017	}
42018
42019	#[simd_test(enable = "avx512f")]
42020	unsafe fn test_mm512_mask_sub_epi32() {
42021	#[rustfmt::skip]
42022	let a = _mm512_setr_epi32(
42023	`0`, `1`, `-1`, i32::MAX,
42024	i32::MIN, `100`, `-100`, `-32`,
42025	`0`, `1`, `-1`, i32::MAX,
42026	i32::MIN, `100`, `-100`, `-32`,
42027	);
42028	let b = _mm512_set1_epi32(`1`);
42029	let r = _mm512_mask_sub_epi32(a, `0`, a, b);
42030	assert_eq_m512i(r, a);
42031	let r = _mm512_mask_sub_epi32(a, `0b00000000_11111111`, a, b);
42032	#[rustfmt::skip]
42033	let e = _mm512_setr_epi32(
42034	`-1`, `0`, `-2`, i32::MAX - `1`,
42035	i32::MAX, `99`, `-101`, `-33`,
42036	`0`, `1`, `-1`, i32::MAX,
42037	i32::MIN, `100`, `-100`, `-32`,
42038	);
42039	assert_eq_m512i(r, e);
42040	}
42041
42042	#[simd_test(enable = "avx512f")]
42043	unsafe fn test_mm512_maskz_sub_epi32() {
42044	#[rustfmt::skip]
42045	let a = _mm512_setr_epi32(
42046	`0`, `1`, `-1`, i32::MAX,
42047	i32::MIN, `100`, `-100`, `-32`,
42048	`0`, `1`, `-1`, i32::MAX,
42049	i32::MIN, `100`, `-100`, `-32`,
42050	);
42051	let b = _mm512_set1_epi32(`1`);
42052	let r = _mm512_maskz_sub_epi32(`0`, a, b);
42053	assert_eq_m512i(r, _mm512_setzero_si512());
42054	let r = _mm512_maskz_sub_epi32(`0b00000000_11111111`, a, b);
42055	#[rustfmt::skip]
42056	let e = _mm512_setr_epi32(
42057	`-1`, `0`, `-2`, i32::MAX - `1`,
42058	i32::MAX, `99`, `-101`, `-33`,
42059	`0`, `0`, `0`, `0`,
42060	`0`, `0`, `0`, `0`,
42061	);
42062	assert_eq_m512i(r, e);
42063	}
42064
42065	#[simd_test(enable = "avx512f,avx512vl")]
42066	unsafe fn test_mm256_mask_sub_epi32() {
42067	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
42068	let b = _mm256_set1_epi32(`1`);
42069	let r = _mm256_mask_sub_epi32(a, `0`, a, b);
42070	assert_eq_m256i(r, a);
42071	let r = _mm256_mask_sub_epi32(a, `0b11111111`, a, b);
42072	let e = _mm256_set_epi32(`-1`, `0`, `-2`, i32::MAX - `1`, i32::MAX, `99`, `-101`, `-33`);
42073	assert_eq_m256i(r, e);
42074	}
42075
42076	#[simd_test(enable = "avx512f,avx512vl")]
42077	unsafe fn test_mm256_maskz_sub_epi32() {
42078	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
42079	let b = _mm256_set1_epi32(`1`);
42080	let r = _mm256_maskz_sub_epi32(`0`, a, b);
42081	assert_eq_m256i(r, _mm256_setzero_si256());
42082	let r = _mm256_maskz_sub_epi32(`0b11111111`, a, b);
42083	let e = _mm256_set_epi32(`-1`, `0`, `-2`, i32::MAX - `1`, i32::MAX, `99`, `-101`, `-33`);
42084	assert_eq_m256i(r, e);
42085	}
42086
42087	#[simd_test(enable = "avx512f,avx512vl")]
42088	unsafe fn test_mm_mask_sub_epi32() {
42089	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
42090	let b = _mm_set1_epi32(`1`);
42091	let r = _mm_mask_sub_epi32(a, `0`, a, b);
42092	assert_eq_m128i(r, a);
42093	let r = _mm_mask_sub_epi32(a, `0b00001111`, a, b);
42094	let e = _mm_set_epi32(`0`, `-2`, i32::MAX - `1`, i32::MAX);
42095	assert_eq_m128i(r, e);
42096	}
42097
42098	#[simd_test(enable = "avx512f,avx512vl")]
42099	unsafe fn test_mm_maskz_sub_epi32() {
42100	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
42101	let b = _mm_set1_epi32(`1`);
42102	let r = _mm_maskz_sub_epi32(`0`, a, b);
42103	assert_eq_m128i(r, _mm_setzero_si128());
42104	let r = _mm_maskz_sub_epi32(`0b00001111`, a, b);
42105	let e = _mm_set_epi32(`0`, `-2`, i32::MAX - `1`, i32::MAX);
42106	assert_eq_m128i(r, e);
42107	}
42108
42109	#[simd_test(enable = "avx512f")]
42110	unsafe fn test_mm512_sub_ps() {
42111	#[rustfmt::skip]
42112	let a = _mm512_setr_ps(
42113	`0.`, `1.`, `-1.`, f32::MAX,
42114	f32::MIN, `100.`, `-100.`, `-32.`,
42115	`0.`, `1.`, `-1.`, f32::MAX,
42116	f32::MIN, `100.`, `-100.`, `-32.`,
42117	);
42118	let b = _mm512_set1_ps(`1.`);
42119	let r = _mm512_sub_ps(a, b);
42120	#[rustfmt::skip]
42121	let e = _mm512_setr_ps(
42122	`-1.`, `0.`, `-2.`, f32::MAX - `1.`,
42123	f32::MIN, `99.`, `-101.`, `-33.`,
42124	`-1.`, `0.`, `-2.`, f32::MAX - `1.`,
42125	f32::MIN, `99.`, `-101.`, `-33.`,
42126	);
42127	assert_eq_m512(r, e);
42128	}
42129
42130	#[simd_test(enable = "avx512f")]
42131	unsafe fn test_mm512_mask_sub_ps() {
42132	#[rustfmt::skip]
42133	let a = _mm512_setr_ps(
42134	`0.`, `1.`, `-1.`, f32::MAX,
42135	f32::MIN, `100.`, `-100.`, `-32.`,
42136	`0.`, `1.`, `-1.`, f32::MAX,
42137	f32::MIN, `100.`, `-100.`, `-32.`,
42138	);
42139	let b = _mm512_set1_ps(`1.`);
42140	let r = _mm512_mask_sub_ps(a, `0`, a, b);
42141	assert_eq_m512(r, a);
42142	let r = _mm512_mask_sub_ps(a, `0b00000000_11111111`, a, b);
42143	#[rustfmt::skip]
42144	let e = _mm512_setr_ps(
42145	`-1.`, `0.`, `-2.`, f32::MAX - `1.`,
42146	f32::MIN, `99.`, `-101.`, `-33.`,
42147	`0.`, `1.`, `-1.`, f32::MAX,
42148	f32::MIN, `100.`, `-100.`, `-32.`,
42149	);
42150	assert_eq_m512(r, e);
42151	}
42152
42153	#[simd_test(enable = "avx512f")]
42154	unsafe fn test_mm512_maskz_sub_ps() {
42155	#[rustfmt::skip]
42156	let a = _mm512_setr_ps(
42157	`0.`, `1.`, `-1.`, f32::MAX,
42158	f32::MIN, `100.`, `-100.`, `-32.`,
42159	`0.`, `1.`, `-1.`, f32::MAX,
42160	f32::MIN, `100.`, `-100.`, `-32.`,
42161	);
42162	let b = _mm512_set1_ps(`1.`);
42163	let r = _mm512_maskz_sub_ps(`0`, a, b);
42164	assert_eq_m512(r, _mm512_setzero_ps());
42165	let r = _mm512_maskz_sub_ps(`0b00000000_11111111`, a, b);
42166	#[rustfmt::skip]
42167	let e = _mm512_setr_ps(
42168	`-1.`, `0.`, `-2.`, f32::MAX - `1.`,
42169	f32::MIN, `99.`, `-101.`, `-33.`,
42170	`0.`, `0.`, `0.`, `0.`,
42171	`0.`, `0.`, `0.`, `0.`,
42172	);
42173	assert_eq_m512(r, e);
42174	}
42175
42176	#[simd_test(enable = "avx512f,avx512vl")]
42177	unsafe fn test_mm256_mask_sub_ps() {
42178	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
42179	let b = _mm256_set1_ps(`1.`);
42180	let r = _mm256_mask_sub_ps(a, `0`, a, b);
42181	assert_eq_m256(r, a);
42182	let r = _mm256_mask_sub_ps(a, `0b11111111`, a, b);
42183	let e = _mm256_set_ps(`-1.`, `0.`, `-2.`, f32::MAX - `1.`, f32::MIN, `99.`, `-101.`, `-33.`);
42184	assert_eq_m256(r, e);
42185	}
42186
42187	#[simd_test(enable = "avx512f,avx512vl")]
42188	unsafe fn test_mm256_maskz_sub_ps() {
42189	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
42190	let b = _mm256_set1_ps(`1.`);
42191	let r = _mm256_maskz_sub_ps(`0`, a, b);
42192	assert_eq_m256(r, _mm256_setzero_ps());
42193	let r = _mm256_maskz_sub_ps(`0b11111111`, a, b);
42194	let e = _mm256_set_ps(`-1.`, `0.`, `-2.`, f32::MAX - `1.`, f32::MIN, `99.`, `-101.`, `-33.`);
42195	assert_eq_m256(r, e);
42196	}
42197
42198	#[simd_test(enable = "avx512f,avx512vl")]
42199	unsafe fn test_mm_mask_sub_ps() {
42200	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
42201	let b = _mm_set1_ps(`1.`);
42202	let r = _mm_mask_sub_ps(a, `0`, a, b);
42203	assert_eq_m128(r, a);
42204	let r = _mm_mask_sub_ps(a, `0b00001111`, a, b);
42205	let e = _mm_set_ps(`0.`, `-2.`, f32::MAX - `1.`, f32::MIN);
42206	assert_eq_m128(r, e);
42207	}
42208
42209	#[simd_test(enable = "avx512f,avx512vl")]
42210	unsafe fn test_mm_maskz_sub_ps() {
42211	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
42212	let b = _mm_set1_ps(`1.`);
42213	let r = _mm_maskz_sub_ps(`0`, a, b);
42214	assert_eq_m128(r, _mm_setzero_ps());
42215	let r = _mm_maskz_sub_ps(`0b00001111`, a, b);
42216	let e = _mm_set_ps(`0.`, `-2.`, f32::MAX - `1.`, f32::MIN);
42217	assert_eq_m128(r, e);
42218	}
42219
42220	#[simd_test(enable = "avx512f")]
42221	unsafe fn test_mm512_mullo_epi32() {
42222	#[rustfmt::skip]
42223	let a = _mm512_setr_epi32(
42224	`0`, `1`, `-1`, i32::MAX,
42225	i32::MIN, `100`, `-100`, `-32`,
42226	`0`, `1`, `-1`, i32::MAX,
42227	i32::MIN, `100`, `-100`, `-32`,
42228	);
42229	let b = _mm512_set1_epi32(`2`);
42230	let r = _mm512_mullo_epi32(a, b);
42231	let e = _mm512_setr_epi32(
42232	`0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`, `0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`,
42233	);
42234	assert_eq_m512i(r, e);
42235	}
42236
42237	#[simd_test(enable = "avx512f")]
42238	unsafe fn test_mm512_mask_mullo_epi32() {
42239	#[rustfmt::skip]
42240	let a = _mm512_setr_epi32(
42241	`0`, `1`, `-1`, i32::MAX,
42242	i32::MIN, `100`, `-100`, `-32`,
42243	`0`, `1`, `-1`, i32::MAX,
42244	i32::MIN, `100`, `-100`, `-32`,
42245	);
42246	let b = _mm512_set1_epi32(`2`);
42247	let r = _mm512_mask_mullo_epi32(a, `0`, a, b);
42248	assert_eq_m512i(r, a);
42249	let r = _mm512_mask_mullo_epi32(a, `0b00000000_11111111`, a, b);
42250	#[rustfmt::skip]
42251	let e = _mm512_setr_epi32(
42252	`0`, `2`, `-2`, `-2`,
42253	`0`, `200`, `-200`, `-64`,
42254	`0`, `1`, `-1`, i32::MAX,
42255	i32::MIN, `100`, `-100`, `-32`,
42256	);
42257	assert_eq_m512i(r, e);
42258	}
42259
42260	#[simd_test(enable = "avx512f")]
42261	unsafe fn test_mm512_maskz_mullo_epi32() {
42262	#[rustfmt::skip]
42263	let a = _mm512_setr_epi32(
42264	`0`, `1`, `-1`, i32::MAX,
42265	i32::MIN, `100`, `-100`, `-32`,
42266	`0`, `1`, `-1`, i32::MAX,
42267	i32::MIN, `100`, `-100`, `-32`,
42268	);
42269	let b = _mm512_set1_epi32(`2`);
42270	let r = _mm512_maskz_mullo_epi32(`0`, a, b);
42271	assert_eq_m512i(r, _mm512_setzero_si512());
42272	let r = _mm512_maskz_mullo_epi32(`0b00000000_11111111`, a, b);
42273	let e = _mm512_setr_epi32(`0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
42274	assert_eq_m512i(r, e);
42275	}
42276
42277	#[simd_test(enable = "avx512f,avx512vl")]
42278	unsafe fn test_mm256_mask_mullo_epi32() {
42279	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
42280	let b = _mm256_set1_epi32(`2`);
42281	let r = _mm256_mask_mullo_epi32(a, `0`, a, b);
42282	assert_eq_m256i(r, a);
42283	let r = _mm256_mask_mullo_epi32(a, `0b11111111`, a, b);
42284	let e = _mm256_set_epi32(`0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`);
42285	assert_eq_m256i(r, e);
42286	}
42287
42288	#[simd_test(enable = "avx512f,avx512vl")]
42289	unsafe fn test_mm256_maskz_mullo_epi32() {
42290	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
42291	let b = _mm256_set1_epi32(`2`);
42292	let r = _mm256_maskz_mullo_epi32(`0`, a, b);
42293	assert_eq_m256i(r, _mm256_setzero_si256());
42294	let r = _mm256_maskz_mullo_epi32(`0b11111111`, a, b);
42295	let e = _mm256_set_epi32(`0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`);
42296	assert_eq_m256i(r, e);
42297	}
42298
42299	#[simd_test(enable = "avx512f,avx512vl")]
42300	unsafe fn test_mm_mask_mullo_epi32() {
42301	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
42302	let b = _mm_set1_epi32(`2`);
42303	let r = _mm_mask_mullo_epi32(a, `0`, a, b);
42304	assert_eq_m128i(r, a);
42305	let r = _mm_mask_mullo_epi32(a, `0b00001111`, a, b);
42306	let e = _mm_set_epi32(`2`, `-2`, `-2`, `0`);
42307	assert_eq_m128i(r, e);
42308	}
42309
42310	#[simd_test(enable = "avx512f,avx512vl")]
42311	unsafe fn test_mm_maskz_mullo_epi32() {
42312	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
42313	let b = _mm_set1_epi32(`2`);
42314	let r = _mm_maskz_mullo_epi32(`0`, a, b);
42315	assert_eq_m128i(r, _mm_setzero_si128());
42316	let r = _mm_maskz_mullo_epi32(`0b00001111`, a, b);
42317	let e = _mm_set_epi32(`2`, `-2`, `-2`, `0`);
42318	assert_eq_m128i(r, e);
42319	}
42320
42321	#[simd_test(enable = "avx512f")]
42322	unsafe fn test_mm512_mul_ps() {
42323	#[rustfmt::skip]
42324	let a = _mm512_setr_ps(
42325	`0.`, `1.`, `-1.`, f32::MAX,
42326	f32::MIN, `100.`, `-100.`, `-32.`,
42327	`0.`, `1.`, `-1.`, f32::MAX,
42328	f32::MIN, `100.`, `-100.`, `-32.`,
42329	);
42330	let b = _mm512_set1_ps(`2.`);
42331	let r = _mm512_mul_ps(a, b);
42332	#[rustfmt::skip]
42333	let e = _mm512_setr_ps(
42334	`0.`, `2.`, `-2.`, f32::INFINITY,
42335	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
42336	`0.`, `2.`, `-2.`, f32::INFINITY,
42337	f32::NEG_INFINITY, `200.`, `-200.`,
42338	`-64.`,
42339	);
42340	assert_eq_m512(r, e);
42341	}
42342
42343	#[simd_test(enable = "avx512f")]
42344	unsafe fn test_mm512_mask_mul_ps() {
42345	#[rustfmt::skip]
42346	let a = _mm512_setr_ps(
42347	`0.`, `1.`, `-1.`, f32::MAX,
42348	f32::MIN, `100.`, `-100.`, `-32.`,
42349	`0.`, `1.`, `-1.`, f32::MAX,
42350	f32::MIN, `100.`, `-100.`, `-32.`,
42351	);
42352	let b = _mm512_set1_ps(`2.`);
42353	let r = _mm512_mask_mul_ps(a, `0`, a, b);
42354	assert_eq_m512(r, a);
42355	let r = _mm512_mask_mul_ps(a, `0b00000000_11111111`, a, b);
42356	#[rustfmt::skip]
42357	let e = _mm512_setr_ps(
42358	`0.`, `2.`, `-2.`, f32::INFINITY,
42359	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
42360	`0.`, `1.`, `-1.`, f32::MAX,
42361	f32::MIN, `100.`, `-100.`, `-32.`,
42362	);
42363	assert_eq_m512(r, e);
42364	}
42365
42366	#[simd_test(enable = "avx512f")]
42367	unsafe fn test_mm512_maskz_mul_ps() {
42368	#[rustfmt::skip]
42369	let a = _mm512_setr_ps(
42370	`0.`, `1.`, `-1.`, f32::MAX,
42371	f32::MIN, `100.`, `-100.`, `-32.`,
42372	`0.`, `1.`, `-1.`, f32::MAX,
42373	f32::MIN, `100.`, `-100.`, `-32.`,
42374	);
42375	let b = _mm512_set1_ps(`2.`);
42376	let r = _mm512_maskz_mul_ps(`0`, a, b);
42377	assert_eq_m512(r, _mm512_setzero_ps());
42378	let r = _mm512_maskz_mul_ps(`0b00000000_11111111`, a, b);
42379	#[rustfmt::skip]
42380	let e = _mm512_setr_ps(
42381	`0.`, `2.`, `-2.`, f32::INFINITY,
42382	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
42383	`0.`, `0.`, `0.`, `0.`,
42384	`0.`, `0.`, `0.`, `0.`,
42385	);
42386	assert_eq_m512(r, e);
42387	}
42388
42389	#[simd_test(enable = "avx512f,avx512vl")]
42390	unsafe fn test_mm256_mask_mul_ps() {
42391	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
42392	let b = _mm256_set1_ps(`2.`);
42393	let r = _mm256_mask_mul_ps(a, `0`, a, b);
42394	assert_eq_m256(r, a);
42395	let r = _mm256_mask_mul_ps(a, `0b11111111`, a, b);
42396	#[rustfmt::skip]
42397	let e = _mm256_set_ps(
42398	`0.`, `2.`, `-2.`, f32::INFINITY,
42399	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
42400	);
42401	assert_eq_m256(r, e);
42402	}
42403
42404	#[simd_test(enable = "avx512f,avx512vl")]
42405	unsafe fn test_mm256_maskz_mul_ps() {
42406	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
42407	let b = _mm256_set1_ps(`2.`);
42408	let r = _mm256_maskz_mul_ps(`0`, a, b);
42409	assert_eq_m256(r, _mm256_setzero_ps());
42410	let r = _mm256_maskz_mul_ps(`0b11111111`, a, b);
42411	#[rustfmt::skip]
42412	let e = _mm256_set_ps(
42413	`0.`, `2.`, `-2.`, f32::INFINITY,
42414	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
42415	);
42416	assert_eq_m256(r, e);
42417	}
42418
42419	#[simd_test(enable = "avx512f,avx512vl")]
42420	unsafe fn test_mm_mask_mul_ps() {
42421	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
42422	let b = _mm_set1_ps(`2.`);
42423	let r = _mm_mask_mul_ps(a, `0`, a, b);
42424	assert_eq_m128(r, a);
42425	let r = _mm_mask_mul_ps(a, `0b00001111`, a, b);
42426	let e = _mm_set_ps(`2.`, `-2.`, f32::INFINITY, f32::NEG_INFINITY);
42427	assert_eq_m128(r, e);
42428	}
42429
42430	#[simd_test(enable = "avx512f,avx512vl")]
42431	unsafe fn test_mm_maskz_mul_ps() {
42432	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
42433	let b = _mm_set1_ps(`2.`);
42434	let r = _mm_maskz_mul_ps(`0`, a, b);
42435	assert_eq_m128(r, _mm_setzero_ps());
42436	let r = _mm_maskz_mul_ps(`0b00001111`, a, b);
42437	let e = _mm_set_ps(`2.`, `-2.`, f32::INFINITY, f32::NEG_INFINITY);
42438	assert_eq_m128(r, e);
42439	}
42440
42441	#[simd_test(enable = "avx512f")]
42442	unsafe fn test_mm512_div_ps() {
42443	let a = _mm512_setr_ps(
42444	`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`, `0.`, `1.`, `-1.`, `1000.`, `-131.`, `100.`, `-100.`, `-32.`,
42445	);
42446	let b = _mm512_setr_ps(
42447	`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`,
42448	);
42449	let r = _mm512_div_ps(a, b);
42450	#[rustfmt::skip]
42451	let e = _mm512_setr_ps(
42452	`0.`, `0.5`, `-0.5`, `-1.`,
42453	`50.`, f32::INFINITY, `-50.`, `-16.`,
42454	`0.`, `0.5`, `-0.5`, `500.`,
42455	f32::NEG_INFINITY, `50.`, `-50.`, `-16.`,
42456	);
42457	assert_eq_m512(r, e); // 0/0 = NAN
42458	}
42459
42460	#[simd_test(enable = "avx512f")]
42461	unsafe fn test_mm512_mask_div_ps() {
42462	let a = _mm512_setr_ps(
42463	`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`, `0.`, `1.`, `-1.`, `1000.`, `-131.`, `100.`, `-100.`, `-32.`,
42464	);
42465	let b = _mm512_setr_ps(
42466	`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`,
42467	);
42468	let r = _mm512_mask_div_ps(a, `0`, a, b);
42469	assert_eq_m512(r, a);
42470	let r = _mm512_mask_div_ps(a, `0b00000000_11111111`, a, b);
42471	#[rustfmt::skip]
42472	let e = _mm512_setr_ps(
42473	`0.`, `0.5`, `-0.5`, `-1.`,
42474	`50.`, f32::INFINITY, `-50.`, `-16.`,
42475	`0.`, `1.`, `-1.`, `1000.`,
42476	`-131.`, `100.`, `-100.`, `-32.`,
42477	);
42478	assert_eq_m512(r, e);
42479	}
42480
42481	#[simd_test(enable = "avx512f")]
42482	unsafe fn test_mm512_maskz_div_ps() {
42483	let a = _mm512_setr_ps(
42484	`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`, `0.`, `1.`, `-1.`, `1000.`, `-131.`, `100.`, `-100.`, `-32.`,
42485	);
42486	let b = _mm512_setr_ps(
42487	`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`,
42488	);
42489	let r = _mm512_maskz_div_ps(`0`, a, b);
42490	assert_eq_m512(r, _mm512_setzero_ps());
42491	let r = _mm512_maskz_div_ps(`0b00000000_11111111`, a, b);
42492	#[rustfmt::skip]
42493	let e = _mm512_setr_ps(
42494	`0.`, `0.5`, `-0.5`, `-1.`,
42495	`50.`, f32::INFINITY, `-50.`, `-16.`,
42496	`0.`, `0.`, `0.`, `0.`,
42497	`0.`, `0.`, `0.`, `0.`,
42498	);
42499	assert_eq_m512(r, e);
42500	}
42501
42502	#[simd_test(enable = "avx512f,avx512vl")]
42503	unsafe fn test_mm256_mask_div_ps() {
42504	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`);
42505	let b = _mm256_set_ps(`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`);
42506	let r = _mm256_mask_div_ps(a, `0`, a, b);
42507	assert_eq_m256(r, a);
42508	let r = _mm256_mask_div_ps(a, `0b11111111`, a, b);
42509	let e = _mm256_set_ps(`0.`, `0.5`, `-0.5`, `-1.`, `50.`, f32::INFINITY, `-50.`, `-16.`);
42510	assert_eq_m256(r, e);
42511	}
42512
42513	#[simd_test(enable = "avx512f,avx512vl")]
42514	unsafe fn test_mm256_maskz_div_ps() {
42515	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`);
42516	let b = _mm256_set_ps(`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`);
42517	let r = _mm256_maskz_div_ps(`0`, a, b);
42518	assert_eq_m256(r, _mm256_setzero_ps());
42519	let r = _mm256_maskz_div_ps(`0b11111111`, a, b);
42520	let e = _mm256_set_ps(`0.`, `0.5`, `-0.5`, `-1.`, `50.`, f32::INFINITY, `-50.`, `-16.`);
42521	assert_eq_m256(r, e);
42522	}
42523
42524	#[simd_test(enable = "avx512f,avx512vl")]
42525	unsafe fn test_mm_mask_div_ps() {
42526	let a = _mm_set_ps(`100.`, `100.`, `-100.`, `-32.`);
42527	let b = _mm_set_ps(`2.`, `0.`, `2.`, `2.`);
42528	let r = _mm_mask_div_ps(a, `0`, a, b);
42529	assert_eq_m128(r, a);
42530	let r = _mm_mask_div_ps(a, `0b00001111`, a, b);
42531	let e = _mm_set_ps(`50.`, f32::INFINITY, `-50.`, `-16.`);
42532	assert_eq_m128(r, e);
42533	}
42534
42535	#[simd_test(enable = "avx512f,avx512vl")]
42536	unsafe fn test_mm_maskz_div_ps() {
42537	let a = _mm_set_ps(`100.`, `100.`, `-100.`, `-32.`);
42538	let b = _mm_set_ps(`2.`, `0.`, `2.`, `2.`);
42539	let r = _mm_maskz_div_ps(`0`, a, b);
42540	assert_eq_m128(r, _mm_setzero_ps());
42541	let r = _mm_maskz_div_ps(`0b00001111`, a, b);
42542	let e = _mm_set_ps(`50.`, f32::INFINITY, `-50.`, `-16.`);
42543	assert_eq_m128(r, e);
42544	}
42545
42546	#[simd_test(enable = "avx512f")]
42547	unsafe fn test_mm512_max_epi32() {
42548	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42549	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42550	let r = _mm512_max_epi32(a, b);
42551	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42552	assert_eq_m512i(r, e);
42553	}
42554
42555	#[simd_test(enable = "avx512f")]
42556	unsafe fn test_mm512_mask_max_epi32() {
42557	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42558	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42559	let r = _mm512_mask_max_epi32(a, `0`, a, b);
42560	assert_eq_m512i(r, a);
42561	let r = _mm512_mask_max_epi32(a, `0b00000000_11111111`, a, b);
42562	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42563	assert_eq_m512i(r, e);
42564	}
42565
42566	#[simd_test(enable = "avx512f")]
42567	unsafe fn test_mm512_maskz_max_epi32() {
42568	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42569	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42570	let r = _mm512_maskz_max_epi32(`0`, a, b);
42571	assert_eq_m512i(r, _mm512_setzero_si512());
42572	let r = _mm512_maskz_max_epi32(`0b00000000_11111111`, a, b);
42573	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
42574	assert_eq_m512i(r, e);
42575	}
42576
42577	#[simd_test(enable = "avx512f,avx512vl")]
42578	unsafe fn test_mm256_mask_max_epi32() {
42579	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
42580	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42581	let r = _mm256_mask_max_epi32(a, `0`, a, b);
42582	assert_eq_m256i(r, a);
42583	let r = _mm256_mask_max_epi32(a, `0b11111111`, a, b);
42584	let e = _mm256_set_epi32(`7`, `6`, `5`, `4`, `4`, `5`, `6`, `7`);
42585	assert_eq_m256i(r, e);
42586	}
42587
42588	#[simd_test(enable = "avx512f,avx512vl")]
42589	unsafe fn test_mm256_maskz_max_epi32() {
42590	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
42591	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42592	let r = _mm256_maskz_max_epi32(`0`, a, b);
42593	assert_eq_m256i(r, _mm256_setzero_si256());
42594	let r = _mm256_maskz_max_epi32(`0b11111111`, a, b);
42595	let e = _mm256_set_epi32(`7`, `6`, `5`, `4`, `4`, `5`, `6`, `7`);
42596	assert_eq_m256i(r, e);
42597	}
42598
42599	#[simd_test(enable = "avx512f,avx512vl")]
42600	unsafe fn test_mm_mask_max_epi32() {
42601	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
42602	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
42603	let r = _mm_mask_max_epi32(a, `0`, a, b);
42604	assert_eq_m128i(r, a);
42605	let r = _mm_mask_max_epi32(a, `0b00001111`, a, b);
42606	let e = _mm_set_epi32(`3`, `2`, `2`, `3`);
42607	assert_eq_m128i(r, e);
42608	}
42609
42610	#[simd_test(enable = "avx512f,avx512vl")]
42611	unsafe fn test_mm_maskz_max_epi32() {
42612	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
42613	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
42614	let r = _mm_maskz_max_epi32(`0`, a, b);
42615	assert_eq_m128i(r, _mm_setzero_si128());
42616	let r = _mm_maskz_max_epi32(`0b00001111`, a, b);
42617	let e = _mm_set_epi32(`3`, `2`, `2`, `3`);
42618	assert_eq_m128i(r, e);
42619	}
42620
42621	#[simd_test(enable = "avx512f")]
42622	unsafe fn test_mm512_max_ps() {
42623	let a = _mm512_setr_ps(
42624	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
42625	);
42626	let b = _mm512_setr_ps(
42627	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
42628	);
42629	let r = _mm512_max_ps(a, b);
42630	let e = _mm512_setr_ps(
42631	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
42632	);
42633	assert_eq_m512(r, e);
42634	}
42635
42636	#[simd_test(enable = "avx512f")]
42637	unsafe fn test_mm512_mask_max_ps() {
42638	let a = _mm512_setr_ps(
42639	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
42640	);
42641	let b = _mm512_setr_ps(
42642	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
42643	);
42644	let r = _mm512_mask_max_ps(a, `0`, a, b);
42645	assert_eq_m512(r, a);
42646	let r = _mm512_mask_max_ps(a, `0b00000000_11111111`, a, b);
42647	let e = _mm512_setr_ps(
42648	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
42649	);
42650	assert_eq_m512(r, e);
42651	}
42652
42653	#[simd_test(enable = "avx512f")]
42654	unsafe fn test_mm512_maskz_max_ps() {
42655	let a = _mm512_setr_ps(
42656	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
42657	);
42658	let b = _mm512_setr_ps(
42659	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
42660	);
42661	let r = _mm512_maskz_max_ps(`0`, a, b);
42662	assert_eq_m512(r, _mm512_setzero_ps());
42663	let r = _mm512_maskz_max_ps(`0b00000000_11111111`, a, b);
42664	let e = _mm512_setr_ps(
42665	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
42666	);
42667	assert_eq_m512(r, e);
42668	}
42669
42670	#[simd_test(enable = "avx512f,avx512vl")]
42671	unsafe fn test_mm256_mask_max_ps() {
42672	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
42673	let b = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`);
42674	let r = _mm256_mask_max_ps(a, `0`, a, b);
42675	assert_eq_m256(r, a);
42676	let r = _mm256_mask_max_ps(a, `0b11111111`, a, b);
42677	let e = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `4.`, `5.`, `6.`, `7.`);
42678	assert_eq_m256(r, e);
42679	}
42680
42681	#[simd_test(enable = "avx512f,avx512vl")]
42682	unsafe fn test_mm256_maskz_max_ps() {
42683	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
42684	let b = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`);
42685	let r = _mm256_maskz_max_ps(`0`, a, b);
42686	assert_eq_m256(r, _mm256_setzero_ps());
42687	let r = _mm256_maskz_max_ps(`0b11111111`, a, b);
42688	let e = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `4.`, `5.`, `6.`, `7.`);
42689	assert_eq_m256(r, e);
42690	}
42691
42692	#[simd_test(enable = "avx512f,avx512vl")]
42693	unsafe fn test_mm_mask_max_ps() {
42694	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
42695	let b = _mm_set_ps(`3.`, `2.`, `1.`, `0.`);
42696	let r = _mm_mask_max_ps(a, `0`, a, b);
42697	assert_eq_m128(r, a);
42698	let r = _mm_mask_max_ps(a, `0b00001111`, a, b);
42699	let e = _mm_set_ps(`3.`, `2.`, `2.`, `3.`);
42700	assert_eq_m128(r, e);
42701	}
42702
42703	#[simd_test(enable = "avx512f,avx512vl")]
42704	unsafe fn test_mm_maskz_max_ps() {
42705	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
42706	let b = _mm_set_ps(`3.`, `2.`, `1.`, `0.`);
42707	let r = _mm_maskz_max_ps(`0`, a, b);
42708	assert_eq_m128(r, _mm_setzero_ps());
42709	let r = _mm_mask_max_ps(a, `0b00001111`, a, b);
42710	let e = _mm_set_ps(`3.`, `2.`, `2.`, `3.`);
42711	assert_eq_m128(r, e);
42712	}
42713
42714	#[simd_test(enable = "avx512f")]
42715	unsafe fn test_mm512_max_epu32() {
42716	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42717	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42718	let r = _mm512_max_epu32(a, b);
42719	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42720	assert_eq_m512i(r, e);
42721	}
42722
42723	#[simd_test(enable = "avx512f")]
42724	unsafe fn test_mm512_mask_max_epu32() {
42725	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42726	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42727	let r = _mm512_mask_max_epu32(a, `0`, a, b);
42728	assert_eq_m512i(r, a);
42729	let r = _mm512_mask_max_epu32(a, `0b00000000_11111111`, a, b);
42730	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42731	assert_eq_m512i(r, e);
42732	}
42733
42734	#[simd_test(enable = "avx512f")]
42735	unsafe fn test_mm512_maskz_max_epu32() {
42736	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42737	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42738	let r = _mm512_maskz_max_epu32(`0`, a, b);
42739	assert_eq_m512i(r, _mm512_setzero_si512());
42740	let r = _mm512_maskz_max_epu32(`0b00000000_11111111`, a, b);
42741	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
42742	assert_eq_m512i(r, e);
42743	}
42744
42745	#[simd_test(enable = "avx512f,avx512vl")]
42746	unsafe fn test_mm256_mask_max_epu32() {
42747	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
42748	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42749	let r = _mm256_mask_max_epu32(a, `0`, a, b);
42750	assert_eq_m256i(r, a);
42751	let r = _mm256_mask_max_epu32(a, `0b11111111`, a, b);
42752	let e = _mm256_set_epi32(`7`, `6`, `5`, `4`, `4`, `5`, `6`, `7`);
42753	assert_eq_m256i(r, e);
42754	}
42755
42756	#[simd_test(enable = "avx512f,avx512vl")]
42757	unsafe fn test_mm256_maskz_max_epu32() {
42758	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
42759	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42760	let r = _mm256_maskz_max_epu32(`0`, a, b);
42761	assert_eq_m256i(r, _mm256_setzero_si256());
42762	let r = _mm256_maskz_max_epu32(`0b11111111`, a, b);
42763	let e = _mm256_set_epi32(`7`, `6`, `5`, `4`, `4`, `5`, `6`, `7`);
42764	assert_eq_m256i(r, e);
42765	}
42766
42767	#[simd_test(enable = "avx512f,avx512vl")]
42768	unsafe fn test_mm_mask_max_epu32() {
42769	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
42770	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
42771	let r = _mm_mask_max_epu32(a, `0`, a, b);
42772	assert_eq_m128i(r, a);
42773	let r = _mm_mask_max_epu32(a, `0b00001111`, a, b);
42774	let e = _mm_set_epi32(`3`, `2`, `2`, `3`);
42775	assert_eq_m128i(r, e);
42776	}
42777
42778	#[simd_test(enable = "avx512f,avx512vl")]
42779	unsafe fn test_mm_maskz_max_epu32() {
42780	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
42781	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
42782	let r = _mm_maskz_max_epu32(`0`, a, b);
42783	assert_eq_m128i(r, _mm_setzero_si128());
42784	let r = _mm_maskz_max_epu32(`0b00001111`, a, b);
42785	let e = _mm_set_epi32(`3`, `2`, `2`, `3`);
42786	assert_eq_m128i(r, e);
42787	}
42788
42789	#[simd_test(enable = "avx512f")]
42790	unsafe fn test_mm512_min_epi32() {
42791	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42792	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42793	let r = _mm512_min_epi32(a, b);
42794	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42795	assert_eq_m512i(r, e);
42796	}
42797
42798	#[simd_test(enable = "avx512f")]
42799	unsafe fn test_mm512_mask_min_epi32() {
42800	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42801	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42802	let r = _mm512_mask_min_epi32(a, `0`, a, b);
42803	assert_eq_m512i(r, a);
42804	let r = _mm512_mask_min_epi32(a, `0b00000000_11111111`, a, b);
42805	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42806	assert_eq_m512i(r, e);
42807	}
42808
42809	#[simd_test(enable = "avx512f")]
42810	unsafe fn test_mm512_maskz_min_epi32() {
42811	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42812	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42813	let r = _mm512_maskz_min_epi32(`0`, a, b);
42814	assert_eq_m512i(r, _mm512_setzero_si512());
42815	let r = _mm512_maskz_min_epi32(`0b00000000_11111111`, a, b);
42816	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
42817	assert_eq_m512i(r, e);
42818	}
42819
42820	#[simd_test(enable = "avx512f,avx512vl")]
42821	unsafe fn test_mm256_mask_min_epi32() {
42822	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
42823	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42824	let r = _mm256_mask_min_epi32(a, `0`, a, b);
42825	assert_eq_m256i(r, a);
42826	let r = _mm256_mask_min_epi32(a, `0b11111111`, a, b);
42827	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `3`, `2`, `1`, `0`);
42828	assert_eq_m256i(r, e);
42829	}
42830
42831	#[simd_test(enable = "avx512f,avx512vl")]
42832	unsafe fn test_mm256_maskz_min_epi32() {
42833	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
42834	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42835	let r = _mm256_maskz_min_epi32(`0`, a, b);
42836	assert_eq_m256i(r, _mm256_setzero_si256());
42837	let r = _mm256_maskz_min_epi32(`0b11111111`, a, b);
42838	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `3`, `2`, `1`, `0`);
42839	assert_eq_m256i(r, e);
42840	}
42841
42842	#[simd_test(enable = "avx512f,avx512vl")]
42843	unsafe fn test_mm_mask_min_epi32() {
42844	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
42845	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
42846	let r = _mm_mask_min_epi32(a, `0`, a, b);
42847	assert_eq_m128i(r, a);
42848	let r = _mm_mask_min_epi32(a, `0b00001111`, a, b);
42849	let e = _mm_set_epi32(`0`, `1`, `1`, `0`);
42850	assert_eq_m128i(r, e);
42851	}
42852
42853	#[simd_test(enable = "avx512f,avx512vl")]
42854	unsafe fn test_mm_maskz_min_epi32() {
42855	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
42856	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
42857	let r = _mm_maskz_min_epi32(`0`, a, b);
42858	assert_eq_m128i(r, _mm_setzero_si128());
42859	let r = _mm_maskz_min_epi32(`0b00001111`, a, b);
42860	let e = _mm_set_epi32(`0`, `1`, `1`, `0`);
42861	assert_eq_m128i(r, e);
42862	}
42863
42864	#[simd_test(enable = "avx512f")]
42865	unsafe fn test_mm512_min_ps() {
42866	let a = _mm512_setr_ps(
42867	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
42868	);
42869	let b = _mm512_setr_ps(
42870	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
42871	);
42872	let r = _mm512_min_ps(a, b);
42873	let e = _mm512_setr_ps(
42874	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
42875	);
42876	assert_eq_m512(r, e);
42877	}
42878
42879	#[simd_test(enable = "avx512f")]
42880	unsafe fn test_mm512_mask_min_ps() {
42881	let a = _mm512_setr_ps(
42882	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
42883	);
42884	let b = _mm512_setr_ps(
42885	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
42886	);
42887	let r = _mm512_mask_min_ps(a, `0`, a, b);
42888	assert_eq_m512(r, a);
42889	let r = _mm512_mask_min_ps(a, `0b00000000_11111111`, a, b);
42890	let e = _mm512_setr_ps(
42891	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
42892	);
42893	assert_eq_m512(r, e);
42894	}
42895
42896	#[simd_test(enable = "avx512f")]
42897	unsafe fn test_mm512_maskz_min_ps() {
42898	let a = _mm512_setr_ps(
42899	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
42900	);
42901	let b = _mm512_setr_ps(
42902	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
42903	);
42904	let r = _mm512_maskz_min_ps(`0`, a, b);
42905	assert_eq_m512(r, _mm512_setzero_ps());
42906	let r = _mm512_maskz_min_ps(`0b00000000_11111111`, a, b);
42907	let e = _mm512_setr_ps(
42908	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
42909	);
42910	assert_eq_m512(r, e);
42911	}
42912
42913	#[simd_test(enable = "avx512f,avx512vl")]
42914	unsafe fn test_mm256_mask_min_ps() {
42915	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
42916	let b = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`);
42917	let r = _mm256_mask_min_ps(a, `0`, a, b);
42918	assert_eq_m256(r, a);
42919	let r = _mm256_mask_min_ps(a, `0b11111111`, a, b);
42920	let e = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `3.`, `2.`, `1.`, `0.`);
42921	assert_eq_m256(r, e);
42922	}
42923
42924	#[simd_test(enable = "avx512f,avx512vl")]
42925	unsafe fn test_mm256_maskz_min_ps() {
42926	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
42927	let b = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`);
42928	let r = _mm256_maskz_min_ps(`0`, a, b);
42929	assert_eq_m256(r, _mm256_setzero_ps());
42930	let r = _mm256_maskz_min_ps(`0b11111111`, a, b);
42931	let e = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `3.`, `2.`, `1.`, `0.`);
42932	assert_eq_m256(r, e);
42933	}
42934
42935	#[simd_test(enable = "avx512f,avx512vl")]
42936	unsafe fn test_mm_mask_min_ps() {
42937	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
42938	let b = _mm_set_ps(`3.`, `2.`, `1.`, `0.`);
42939	let r = _mm_mask_min_ps(a, `0`, a, b);
42940	assert_eq_m128(r, a);
42941	let r = _mm_mask_min_ps(a, `0b00001111`, a, b);
42942	let e = _mm_set_ps(`0.`, `1.`, `1.`, `0.`);
42943	assert_eq_m128(r, e);
42944	}
42945
42946	#[simd_test(enable = "avx512f,avx512vl")]
42947	unsafe fn test_mm_maskz_min_ps() {
42948	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
42949	let b = _mm_set_ps(`3.`, `2.`, `1.`, `0.`);
42950	let r = _mm_maskz_min_ps(`0`, a, b);
42951	assert_eq_m128(r, _mm_setzero_ps());
42952	let r = _mm_maskz_min_ps(`0b00001111`, a, b);
42953	let e = _mm_set_ps(`0.`, `1.`, `1.`, `0.`);
42954	assert_eq_m128(r, e);
42955	}
42956
42957	#[simd_test(enable = "avx512f")]
42958	unsafe fn test_mm512_min_epu32() {
42959	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42960	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42961	let r = _mm512_min_epu32(a, b);
42962	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42963	assert_eq_m512i(r, e);
42964	}
42965
42966	#[simd_test(enable = "avx512f")]
42967	unsafe fn test_mm512_mask_min_epu32() {
42968	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42969	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42970	let r = _mm512_mask_min_epu32(a, `0`, a, b);
42971	assert_eq_m512i(r, a);
42972	let r = _mm512_mask_min_epu32(a, `0b00000000_11111111`, a, b);
42973	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42974	assert_eq_m512i(r, e);
42975	}
42976
42977	#[simd_test(enable = "avx512f")]
42978	unsafe fn test_mm512_maskz_min_epu32() {
42979	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
42980	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42981	let r = _mm512_maskz_min_epu32(`0`, a, b);
42982	assert_eq_m512i(r, _mm512_setzero_si512());
42983	let r = _mm512_maskz_min_epu32(`0b00000000_11111111`, a, b);
42984	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
42985	assert_eq_m512i(r, e);
42986	}
42987
42988	#[simd_test(enable = "avx512f,avx512vl")]
42989	unsafe fn test_mm256_mask_min_epu32() {
42990	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
42991	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
42992	let r = _mm256_mask_min_epu32(a, `0`, a, b);
42993	assert_eq_m256i(r, a);
42994	let r = _mm256_mask_min_epu32(a, `0b11111111`, a, b);
42995	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `3`, `2`, `1`, `0`);
42996	assert_eq_m256i(r, e);
42997	}
42998
42999	#[simd_test(enable = "avx512f,avx512vl")]
43000	unsafe fn test_mm256_maskz_min_epu32() {
43001	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
43002	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
43003	let r = _mm256_maskz_min_epu32(`0`, a, b);
43004	assert_eq_m256i(r, _mm256_setzero_si256());
43005	let r = _mm256_maskz_min_epu32(`0b11111111`, a, b);
43006	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `3`, `2`, `1`, `0`);
43007	assert_eq_m256i(r, e);
43008	}
43009
43010	#[simd_test(enable = "avx512f,avx512vl")]
43011	unsafe fn test_mm_mask_min_epu32() {
43012	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
43013	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
43014	let r = _mm_mask_min_epu32(a, `0`, a, b);
43015	assert_eq_m128i(r, a);
43016	let r = _mm_mask_min_epu32(a, `0b00001111`, a, b);
43017	let e = _mm_set_epi32(`0`, `1`, `1`, `0`);
43018	assert_eq_m128i(r, e);
43019	}
43020
43021	#[simd_test(enable = "avx512f,avx512vl")]
43022	unsafe fn test_mm_maskz_min_epu32() {
43023	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
43024	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
43025	let r = _mm_maskz_min_epu32(`0`, a, b);
43026	assert_eq_m128i(r, _mm_setzero_si128());
43027	let r = _mm_maskz_min_epu32(`0b00001111`, a, b);
43028	let e = _mm_set_epi32(`0`, `1`, `1`, `0`);
43029	assert_eq_m128i(r, e);
43030	}
43031
43032	#[simd_test(enable = "avx512f")]
43033	unsafe fn test_mm512_sqrt_ps() {
43034	let a = _mm512_setr_ps(
43035	`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`, `64.`, `81.`, `100.`, `121.`, `144.`, `169.`, `196.`, `225.`,
43036	);
43037	let r = _mm512_sqrt_ps(a);
43038	let e = _mm512_setr_ps(
43039	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43040	);
43041	assert_eq_m512(r, e);
43042	}
43043
43044	#[simd_test(enable = "avx512f")]
43045	unsafe fn test_mm512_mask_sqrt_ps() {
43046	let a = _mm512_setr_ps(
43047	`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`, `64.`, `81.`, `100.`, `121.`, `144.`, `169.`, `196.`, `225.`,
43048	);
43049	let r = _mm512_mask_sqrt_ps(a, `0`, a);
43050	assert_eq_m512(r, a);
43051	let r = _mm512_mask_sqrt_ps(a, `0b00000000_11111111`, a);
43052	let e = _mm512_setr_ps(
43053	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `64.`, `81.`, `100.`, `121.`, `144.`, `169.`, `196.`, `225.`,
43054	);
43055	assert_eq_m512(r, e);
43056	}
43057
43058	#[simd_test(enable = "avx512f")]
43059	unsafe fn test_mm512_maskz_sqrt_ps() {
43060	let a = _mm512_setr_ps(
43061	`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`, `64.`, `81.`, `100.`, `121.`, `144.`, `169.`, `196.`, `225.`,
43062	);
43063	let r = _mm512_maskz_sqrt_ps(`0`, a);
43064	assert_eq_m512(r, _mm512_setzero_ps());
43065	let r = _mm512_maskz_sqrt_ps(`0b00000000_11111111`, a);
43066	let e = _mm512_setr_ps(
43067	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
43068	);
43069	assert_eq_m512(r, e);
43070	}
43071
43072	#[simd_test(enable = "avx512f,avx512vl")]
43073	unsafe fn test_mm256_mask_sqrt_ps() {
43074	let a = _mm256_set_ps(`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`);
43075	let r = _mm256_mask_sqrt_ps(a, `0`, a);
43076	assert_eq_m256(r, a);
43077	let r = _mm256_mask_sqrt_ps(a, `0b11111111`, a);
43078	let e = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43079	assert_eq_m256(r, e);
43080	}
43081
43082	#[simd_test(enable = "avx512f,avx512vl")]
43083	unsafe fn test_mm256_maskz_sqrt_ps() {
43084	let a = _mm256_set_ps(`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`);
43085	let r = _mm256_maskz_sqrt_ps(`0`, a);
43086	assert_eq_m256(r, _mm256_setzero_ps());
43087	let r = _mm256_maskz_sqrt_ps(`0b11111111`, a);
43088	let e = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43089	assert_eq_m256(r, e);
43090	}
43091
43092	#[simd_test(enable = "avx512f,avx512vl")]
43093	unsafe fn test_mm_mask_sqrt_ps() {
43094	let a = _mm_set_ps(`0.`, `1.`, `4.`, `9.`);
43095	let r = _mm_mask_sqrt_ps(a, `0`, a);
43096	assert_eq_m128(r, a);
43097	let r = _mm_mask_sqrt_ps(a, `0b00001111`, a);
43098	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43099	assert_eq_m128(r, e);
43100	}
43101
43102	#[simd_test(enable = "avx512f,avx512vl")]
43103	unsafe fn test_mm_maskz_sqrt_ps() {
43104	let a = _mm_set_ps(`0.`, `1.`, `4.`, `9.`);
43105	let r = _mm_maskz_sqrt_ps(`0`, a);
43106	assert_eq_m128(r, _mm_setzero_ps());
43107	let r = _mm_maskz_sqrt_ps(`0b00001111`, a);
43108	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43109	assert_eq_m128(r, e);
43110	}
43111
43112	#[simd_test(enable = "avx512f")]
43113	unsafe fn test_mm512_fmadd_ps() {
43114	let a = _mm512_set1_ps(`1.`);
43115	let b = _mm512_setr_ps(
43116	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43117	);
43118	let c = _mm512_set1_ps(`1.`);
43119	let r = _mm512_fmadd_ps(a, b, c);
43120	let e = _mm512_setr_ps(
43121	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
43122	);
43123	assert_eq_m512(r, e);
43124	}
43125
43126	#[simd_test(enable = "avx512f")]
43127	unsafe fn test_mm512_mask_fmadd_ps() {
43128	let a = _mm512_set1_ps(`1.`);
43129	let b = _mm512_setr_ps(
43130	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43131	);
43132	let c = _mm512_set1_ps(`1.`);
43133	let r = _mm512_mask_fmadd_ps(a, `0`, b, c);
43134	assert_eq_m512(r, a);
43135	let r = _mm512_mask_fmadd_ps(a, `0b00000000_11111111`, b, c);
43136	let e = _mm512_setr_ps(
43137	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
43138	);
43139	assert_eq_m512(r, e);
43140	}
43141
43142	#[simd_test(enable = "avx512f")]
43143	unsafe fn test_mm512_maskz_fmadd_ps() {
43144	let a = _mm512_set1_ps(`1.`);
43145	let b = _mm512_setr_ps(
43146	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43147	);
43148	let c = _mm512_set1_ps(`1.`);
43149	let r = _mm512_maskz_fmadd_ps(`0`, a, b, c);
43150	assert_eq_m512(r, _mm512_setzero_ps());
43151	let r = _mm512_maskz_fmadd_ps(`0b00000000_11111111`, a, b, c);
43152	let e = _mm512_setr_ps(
43153	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
43154	);
43155	assert_eq_m512(r, e);
43156	}
43157
43158	#[simd_test(enable = "avx512f")]
43159	unsafe fn test_mm512_mask3_fmadd_ps() {
43160	let a = _mm512_set1_ps(`1.`);
43161	let b = _mm512_setr_ps(
43162	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43163	);
43164	let c = _mm512_set1_ps(`2.`);
43165	let r = _mm512_mask3_fmadd_ps(a, b, c, `0`);
43166	assert_eq_m512(r, c);
43167	let r = _mm512_mask3_fmadd_ps(a, b, c, `0b00000000_11111111`);
43168	let e = _mm512_setr_ps(
43169	`2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
43170	);
43171	assert_eq_m512(r, e);
43172	}
43173
43174	#[simd_test(enable = "avx512f,avx512vl")]
43175	unsafe fn test_mm256_mask_fmadd_ps() {
43176	let a = _mm256_set1_ps(`1.`);
43177	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43178	let c = _mm256_set1_ps(`1.`);
43179	let r = _mm256_mask_fmadd_ps(a, `0`, b, c);
43180	assert_eq_m256(r, a);
43181	let r = _mm256_mask_fmadd_ps(a, `0b11111111`, b, c);
43182	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
43183	assert_eq_m256(r, e);
43184	}
43185
43186	#[simd_test(enable = "avx512f,avx512vl")]
43187	unsafe fn test_mm256_maskz_fmadd_ps() {
43188	let a = _mm256_set1_ps(`1.`);
43189	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43190	let c = _mm256_set1_ps(`1.`);
43191	let r = _mm256_maskz_fmadd_ps(`0`, a, b, c);
43192	assert_eq_m256(r, _mm256_setzero_ps());
43193	let r = _mm256_maskz_fmadd_ps(`0b11111111`, a, b, c);
43194	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
43195	assert_eq_m256(r, e);
43196	}
43197
43198	#[simd_test(enable = "avx512f,avx512vl")]
43199	unsafe fn test_mm256_mask3_fmadd_ps() {
43200	let a = _mm256_set1_ps(`1.`);
43201	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43202	let c = _mm256_set1_ps(`1.`);
43203	let r = _mm256_mask3_fmadd_ps(a, b, c, `0`);
43204	assert_eq_m256(r, c);
43205	let r = _mm256_mask3_fmadd_ps(a, b, c, `0b11111111`);
43206	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
43207	assert_eq_m256(r, e);
43208	}
43209
43210	#[simd_test(enable = "avx512f,avx512vl")]
43211	unsafe fn test_mm_mask_fmadd_ps() {
43212	let a = _mm_set1_ps(`1.`);
43213	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43214	let c = _mm_set1_ps(`1.`);
43215	let r = _mm_mask_fmadd_ps(a, `0`, b, c);
43216	assert_eq_m128(r, a);
43217	let r = _mm_mask_fmadd_ps(a, `0b00001111`, b, c);
43218	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
43219	assert_eq_m128(r, e);
43220	}
43221
43222	#[simd_test(enable = "avx512f,avx512vl")]
43223	unsafe fn test_mm_maskz_fmadd_ps() {
43224	let a = _mm_set1_ps(`1.`);
43225	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43226	let c = _mm_set1_ps(`1.`);
43227	let r = _mm_maskz_fmadd_ps(`0`, a, b, c);
43228	assert_eq_m128(r, _mm_setzero_ps());
43229	let r = _mm_maskz_fmadd_ps(`0b00001111`, a, b, c);
43230	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
43231	assert_eq_m128(r, e);
43232	}
43233
43234	#[simd_test(enable = "avx512f,avx512vl")]
43235	unsafe fn test_mm_mask3_fmadd_ps() {
43236	let a = _mm_set1_ps(`1.`);
43237	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43238	let c = _mm_set1_ps(`1.`);
43239	let r = _mm_mask3_fmadd_ps(a, b, c, `0`);
43240	assert_eq_m128(r, c);
43241	let r = _mm_mask3_fmadd_ps(a, b, c, `0b00001111`);
43242	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
43243	assert_eq_m128(r, e);
43244	}
43245
43246	#[simd_test(enable = "avx512f")]
43247	unsafe fn test_mm512_fmsub_ps() {
43248	let a = _mm512_setr_ps(
43249	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
43250	);
43251	let b = _mm512_setr_ps(
43252	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43253	);
43254	let c = _mm512_setr_ps(
43255	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
43256	);
43257	let r = _mm512_fmsub_ps(a, b, c);
43258	let e = _mm512_setr_ps(
43259	`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`,
43260	);
43261	assert_eq_m512(r, e);
43262	}
43263
43264	#[simd_test(enable = "avx512f")]
43265	unsafe fn test_mm512_mask_fmsub_ps() {
43266	let a = _mm512_set1_ps(`1.`);
43267	let b = _mm512_setr_ps(
43268	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43269	);
43270	let c = _mm512_set1_ps(`1.`);
43271	let r = _mm512_mask_fmsub_ps(a, `0`, b, c);
43272	assert_eq_m512(r, a);
43273	let r = _mm512_mask_fmsub_ps(a, `0b00000000_11111111`, b, c);
43274	let e = _mm512_setr_ps(
43275	`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
43276	);
43277	assert_eq_m512(r, e);
43278	}
43279
43280	#[simd_test(enable = "avx512f")]
43281	unsafe fn test_mm512_maskz_fmsub_ps() {
43282	let a = _mm512_set1_ps(`1.`);
43283	let b = _mm512_setr_ps(
43284	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43285	);
43286	let c = _mm512_set1_ps(`1.`);
43287	let r = _mm512_maskz_fmsub_ps(`0`, a, b, c);
43288	assert_eq_m512(r, _mm512_setzero_ps());
43289	let r = _mm512_maskz_fmsub_ps(`0b00000000_11111111`, a, b, c);
43290	let e = _mm512_setr_ps(
43291	`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
43292	);
43293	assert_eq_m512(r, e);
43294	}
43295
43296	#[simd_test(enable = "avx512f")]
43297	unsafe fn test_mm512_mask3_fmsub_ps() {
43298	let a = _mm512_set1_ps(`1.`);
43299	let b = _mm512_setr_ps(
43300	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43301	);
43302	let c = _mm512_setr_ps(
43303	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
43304	);
43305	let r = _mm512_mask3_fmsub_ps(a, b, c, `0`);
43306	assert_eq_m512(r, c);
43307	let r = _mm512_mask3_fmsub_ps(a, b, c, `0b00000000_11111111`);
43308	let e = _mm512_setr_ps(
43309	`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
43310	);
43311	assert_eq_m512(r, e);
43312	}
43313
43314	#[simd_test(enable = "avx512f,avx512vl")]
43315	unsafe fn test_mm256_mask_fmsub_ps() {
43316	let a = _mm256_set1_ps(`1.`);
43317	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43318	let c = _mm256_set1_ps(`1.`);
43319	let r = _mm256_mask_fmsub_ps(a, `0`, b, c);
43320	assert_eq_m256(r, a);
43321	let r = _mm256_mask_fmsub_ps(a, `0b11111111`, b, c);
43322	let e = _mm256_set_ps(`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`);
43323	assert_eq_m256(r, e);
43324	}
43325
43326	#[simd_test(enable = "avx512f,avx512vl")]
43327	unsafe fn test_mm256_maskz_fmsub_ps() {
43328	let a = _mm256_set1_ps(`1.`);
43329	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43330	let c = _mm256_set1_ps(`1.`);
43331	let r = _mm256_maskz_fmsub_ps(`0`, a, b, c);
43332	assert_eq_m256(r, _mm256_setzero_ps());
43333	let r = _mm256_maskz_fmsub_ps(`0b11111111`, a, b, c);
43334	let e = _mm256_set_ps(`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`);
43335	assert_eq_m256(r, e);
43336	}
43337
43338	#[simd_test(enable = "avx512f,avx512vl")]
43339	unsafe fn test_mm256_mask3_fmsub_ps() {
43340	let a = _mm256_set1_ps(`1.`);
43341	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43342	let c = _mm256_set1_ps(`1.`);
43343	let r = _mm256_mask3_fmsub_ps(a, b, c, `0`);
43344	assert_eq_m256(r, c);
43345	let r = _mm256_mask3_fmsub_ps(a, b, c, `0b11111111`);
43346	let e = _mm256_set_ps(`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`);
43347	assert_eq_m256(r, e);
43348	}
43349
43350	#[simd_test(enable = "avx512f,avx512vl")]
43351	unsafe fn test_mm_mask_fmsub_ps() {
43352	let a = _mm_set1_ps(`1.`);
43353	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43354	let c = _mm_set1_ps(`1.`);
43355	let r = _mm_mask_fmsub_ps(a, `0`, b, c);
43356	assert_eq_m128(r, a);
43357	let r = _mm_mask_fmsub_ps(a, `0b00001111`, b, c);
43358	let e = _mm_set_ps(`-1.`, `0.`, `1.`, `2.`);
43359	assert_eq_m128(r, e);
43360	}
43361
43362	#[simd_test(enable = "avx512f,avx512vl")]
43363	unsafe fn test_mm_maskz_fmsub_ps() {
43364	let a = _mm_set1_ps(`1.`);
43365	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43366	let c = _mm_set1_ps(`1.`);
43367	let r = _mm_maskz_fmsub_ps(`0`, a, b, c);
43368	assert_eq_m128(r, _mm_setzero_ps());
43369	let r = _mm_maskz_fmsub_ps(`0b00001111`, a, b, c);
43370	let e = _mm_set_ps(`-1.`, `0.`, `1.`, `2.`);
43371	assert_eq_m128(r, e);
43372	}
43373
43374	#[simd_test(enable = "avx512f,avx512vl")]
43375	unsafe fn test_mm_mask3_fmsub_ps() {
43376	let a = _mm_set1_ps(`1.`);
43377	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43378	let c = _mm_set1_ps(`1.`);
43379	let r = _mm_mask3_fmsub_ps(a, b, c, `0`);
43380	assert_eq_m128(r, c);
43381	let r = _mm_mask3_fmsub_ps(a, b, c, `0b00001111`);
43382	let e = _mm_set_ps(`-1.`, `0.`, `1.`, `2.`);
43383	assert_eq_m128(r, e);
43384	}
43385
43386	#[simd_test(enable = "avx512f")]
43387	unsafe fn test_mm512_fmaddsub_ps() {
43388	let a = _mm512_set1_ps(`1.`);
43389	let b = _mm512_setr_ps(
43390	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43391	);
43392	let c = _mm512_set1_ps(`1.`);
43393	let r = _mm512_fmaddsub_ps(a, b, c);
43394	let e = _mm512_setr_ps(
43395	`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`, `10.`, `9.`, `12.`, `11.`, `14.`, `13.`, `16.`,
43396	);
43397	assert_eq_m512(r, e);
43398	}
43399
43400	#[simd_test(enable = "avx512f")]
43401	unsafe fn test_mm512_mask_fmaddsub_ps() {
43402	let a = _mm512_set1_ps(`1.`);
43403	let b = _mm512_setr_ps(
43404	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43405	);
43406	let c = _mm512_set1_ps(`1.`);
43407	let r = _mm512_mask_fmaddsub_ps(a, `0`, b, c);
43408	assert_eq_m512(r, a);
43409	let r = _mm512_mask_fmaddsub_ps(a, `0b00000000_11111111`, b, c);
43410	let e = _mm512_setr_ps(
43411	`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
43412	);
43413	assert_eq_m512(r, e);
43414	}
43415
43416	#[simd_test(enable = "avx512f")]
43417	unsafe fn test_mm512_maskz_fmaddsub_ps() {
43418	let a = _mm512_set1_ps(`1.`);
43419	let b = _mm512_setr_ps(
43420	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43421	);
43422	let c = _mm512_set1_ps(`1.`);
43423	let r = _mm512_maskz_fmaddsub_ps(`0`, a, b, c);
43424	assert_eq_m512(r, _mm512_setzero_ps());
43425	let r = _mm512_maskz_fmaddsub_ps(`0b00000000_11111111`, a, b, c);
43426	let e = _mm512_setr_ps(
43427	`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
43428	);
43429	assert_eq_m512(r, e);
43430	}
43431
43432	#[simd_test(enable = "avx512f")]
43433	unsafe fn test_mm512_mask3_fmaddsub_ps() {
43434	let a = _mm512_set1_ps(`1.`);
43435	let b = _mm512_setr_ps(
43436	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43437	);
43438	let c = _mm512_setr_ps(
43439	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
43440	);
43441	let r = _mm512_mask3_fmaddsub_ps(a, b, c, `0`);
43442	assert_eq_m512(r, c);
43443	let r = _mm512_mask3_fmaddsub_ps(a, b, c, `0b00000000_11111111`);
43444	let e = _mm512_setr_ps(
43445	`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
43446	);
43447	assert_eq_m512(r, e);
43448	}
43449
43450	#[simd_test(enable = "avx512f,avx512vl")]
43451	unsafe fn test_mm256_mask_fmaddsub_ps() {
43452	let a = _mm256_set1_ps(`1.`);
43453	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43454	let c = _mm256_set1_ps(`1.`);
43455	let r = _mm256_mask_fmaddsub_ps(a, `0`, b, c);
43456	assert_eq_m256(r, a);
43457	let r = _mm256_mask_fmaddsub_ps(a, `0b11111111`, b, c);
43458	let e = _mm256_set_ps(`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`);
43459	assert_eq_m256(r, e);
43460	}
43461
43462	#[simd_test(enable = "avx512f,avx512vl")]
43463	unsafe fn test_mm256_maskz_fmaddsub_ps() {
43464	let a = _mm256_set1_ps(`1.`);
43465	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43466	let c = _mm256_set1_ps(`1.`);
43467	let r = _mm256_maskz_fmaddsub_ps(`0`, a, b, c);
43468	assert_eq_m256(r, _mm256_setzero_ps());
43469	let r = _mm256_maskz_fmaddsub_ps(`0b11111111`, a, b, c);
43470	let e = _mm256_set_ps(`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`);
43471	assert_eq_m256(r, e);
43472	}
43473
43474	#[simd_test(enable = "avx512f,avx512vl")]
43475	unsafe fn test_mm256_mask3_fmaddsub_ps() {
43476	let a = _mm256_set1_ps(`1.`);
43477	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43478	let c = _mm256_set1_ps(`1.`);
43479	let r = _mm256_mask3_fmaddsub_ps(a, b, c, `0`);
43480	assert_eq_m256(r, c);
43481	let r = _mm256_mask3_fmaddsub_ps(a, b, c, `0b11111111`);
43482	let e = _mm256_set_ps(`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`);
43483	assert_eq_m256(r, e);
43484	}
43485
43486	#[simd_test(enable = "avx512f,avx512vl")]
43487	unsafe fn test_mm_mask_fmaddsub_ps() {
43488	let a = _mm_set1_ps(`1.`);
43489	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43490	let c = _mm_set1_ps(`1.`);
43491	let r = _mm_mask_fmaddsub_ps(a, `0`, b, c);
43492	assert_eq_m128(r, a);
43493	let r = _mm_mask_fmaddsub_ps(a, `0b00001111`, b, c);
43494	let e = _mm_set_ps(`1.`, `0.`, `3.`, `2.`);
43495	assert_eq_m128(r, e);
43496	}
43497
43498	#[simd_test(enable = "avx512f,avx512vl")]
43499	unsafe fn test_mm_maskz_fmaddsub_ps() {
43500	let a = _mm_set1_ps(`1.`);
43501	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43502	let c = _mm_set1_ps(`1.`);
43503	let r = _mm_maskz_fmaddsub_ps(`0`, a, b, c);
43504	assert_eq_m128(r, _mm_setzero_ps());
43505	let r = _mm_maskz_fmaddsub_ps(`0b00001111`, a, b, c);
43506	let e = _mm_set_ps(`1.`, `0.`, `3.`, `2.`);
43507	assert_eq_m128(r, e);
43508	}
43509
43510	#[simd_test(enable = "avx512f,avx512vl")]
43511	unsafe fn test_mm_mask3_fmaddsub_ps() {
43512	let a = _mm_set1_ps(`1.`);
43513	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43514	let c = _mm_set1_ps(`1.`);
43515	let r = _mm_mask3_fmaddsub_ps(a, b, c, `0`);
43516	assert_eq_m128(r, c);
43517	let r = _mm_mask3_fmaddsub_ps(a, b, c, `0b00001111`);
43518	let e = _mm_set_ps(`1.`, `0.`, `3.`, `2.`);
43519	assert_eq_m128(r, e);
43520	}
43521
43522	#[simd_test(enable = "avx512f")]
43523	unsafe fn test_mm512_fmsubadd_ps() {
43524	let a = _mm512_setr_ps(
43525	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
43526	);
43527	let b = _mm512_setr_ps(
43528	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43529	);
43530	let c = _mm512_setr_ps(
43531	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
43532	);
43533	let r = _mm512_fmsubadd_ps(a, b, c);
43534	let e = _mm512_setr_ps(
43535	`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`, `9.`, `8.`, `11.`, `10.`, `13.`, `12.`, `15.`, `14.`,
43536	);
43537	assert_eq_m512(r, e);
43538	}
43539
43540	#[simd_test(enable = "avx512f")]
43541	unsafe fn test_mm512_mask_fmsubadd_ps() {
43542	let a = _mm512_set1_ps(`1.`);
43543	let b = _mm512_setr_ps(
43544	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43545	);
43546	let c = _mm512_set1_ps(`1.`);
43547	let r = _mm512_mask_fmsubadd_ps(a, `0`, b, c);
43548	assert_eq_m512(r, a);
43549	let r = _mm512_mask_fmsubadd_ps(a, `0b00000000_11111111`, b, c);
43550	let e = _mm512_setr_ps(
43551	`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
43552	);
43553	assert_eq_m512(r, e);
43554	}
43555
43556	#[simd_test(enable = "avx512f")]
43557	unsafe fn test_mm512_maskz_fmsubadd_ps() {
43558	let a = _mm512_set1_ps(`1.`);
43559	let b = _mm512_setr_ps(
43560	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43561	);
43562	let c = _mm512_set1_ps(`1.`);
43563	let r = _mm512_maskz_fmsubadd_ps(`0`, a, b, c);
43564	assert_eq_m512(r, _mm512_setzero_ps());
43565	let r = _mm512_maskz_fmsubadd_ps(`0b00000000_11111111`, a, b, c);
43566	let e = _mm512_setr_ps(
43567	`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
43568	);
43569	assert_eq_m512(r, e);
43570	}
43571
43572	#[simd_test(enable = "avx512f")]
43573	unsafe fn test_mm512_mask3_fmsubadd_ps() {
43574	let a = _mm512_set1_ps(`1.`);
43575	let b = _mm512_setr_ps(
43576	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43577	);
43578	let c = _mm512_setr_ps(
43579	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
43580	);
43581	let r = _mm512_mask3_fmsubadd_ps(a, b, c, `0`);
43582	assert_eq_m512(r, c);
43583	let r = _mm512_mask3_fmsubadd_ps(a, b, c, `0b00000000_11111111`);
43584	let e = _mm512_setr_ps(
43585	`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
43586	);
43587	assert_eq_m512(r, e);
43588	}
43589
43590	#[simd_test(enable = "avx512f,avx512vl")]
43591	unsafe fn test_mm256_mask_fmsubadd_ps() {
43592	let a = _mm256_set1_ps(`1.`);
43593	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43594	let c = _mm256_set1_ps(`1.`);
43595	let r = _mm256_mask_fmsubadd_ps(a, `0`, b, c);
43596	assert_eq_m256(r, a);
43597	let r = _mm256_mask_fmsubadd_ps(a, `0b11111111`, b, c);
43598	let e = _mm256_set_ps(`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`);
43599	assert_eq_m256(r, e);
43600	}
43601
43602	#[simd_test(enable = "avx512f,avx512vl")]
43603	unsafe fn test_mm256_maskz_fmsubadd_ps() {
43604	let a = _mm256_set1_ps(`1.`);
43605	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43606	let c = _mm256_set1_ps(`1.`);
43607	let r = _mm256_maskz_fmsubadd_ps(`0`, a, b, c);
43608	assert_eq_m256(r, _mm256_setzero_ps());
43609	let r = _mm256_maskz_fmsubadd_ps(`0b11111111`, a, b, c);
43610	let e = _mm256_set_ps(`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`);
43611	assert_eq_m256(r, e);
43612	}
43613
43614	#[simd_test(enable = "avx512f,avx512vl")]
43615	unsafe fn test_mm256_mask3_fmsubadd_ps() {
43616	let a = _mm256_set1_ps(`1.`);
43617	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43618	let c = _mm256_set1_ps(`1.`);
43619	let r = _mm256_mask3_fmsubadd_ps(a, b, c, `0`);
43620	assert_eq_m256(r, c);
43621	let r = _mm256_mask3_fmsubadd_ps(a, b, c, `0b11111111`);
43622	let e = _mm256_set_ps(`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`);
43623	assert_eq_m256(r, e);
43624	}
43625
43626	#[simd_test(enable = "avx512f,avx512vl")]
43627	unsafe fn test_mm_mask_fmsubadd_ps() {
43628	let a = _mm_set1_ps(`1.`);
43629	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43630	let c = _mm_set1_ps(`1.`);
43631	let r = _mm_mask_fmsubadd_ps(a, `0`, b, c);
43632	assert_eq_m128(r, a);
43633	let r = _mm_mask_fmsubadd_ps(a, `0b00001111`, b, c);
43634	let e = _mm_set_ps(`-1.`, `2.`, `1.`, `4.`);
43635	assert_eq_m128(r, e);
43636	}
43637
43638	#[simd_test(enable = "avx512f,avx512vl")]
43639	unsafe fn test_mm_maskz_fmsubadd_ps() {
43640	let a = _mm_set1_ps(`1.`);
43641	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43642	let c = _mm_set1_ps(`1.`);
43643	let r = _mm_maskz_fmsubadd_ps(`0`, a, b, c);
43644	assert_eq_m128(r, _mm_setzero_ps());
43645	let r = _mm_maskz_fmsubadd_ps(`0b00001111`, a, b, c);
43646	let e = _mm_set_ps(`-1.`, `2.`, `1.`, `4.`);
43647	assert_eq_m128(r, e);
43648	}
43649
43650	#[simd_test(enable = "avx512f,avx512vl")]
43651	unsafe fn test_mm_mask3_fmsubadd_ps() {
43652	let a = _mm_set1_ps(`1.`);
43653	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43654	let c = _mm_set1_ps(`1.`);
43655	let r = _mm_mask3_fmsubadd_ps(a, b, c, `0`);
43656	assert_eq_m128(r, c);
43657	let r = _mm_mask3_fmsubadd_ps(a, b, c, `0b00001111`);
43658	let e = _mm_set_ps(`-1.`, `2.`, `1.`, `4.`);
43659	assert_eq_m128(r, e);
43660	}
43661
43662	#[simd_test(enable = "avx512f")]
43663	unsafe fn test_mm512_fnmadd_ps() {
43664	let a = _mm512_set1_ps(`1.`);
43665	let b = _mm512_setr_ps(
43666	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43667	);
43668	let c = _mm512_set1_ps(`1.`);
43669	let r = _mm512_fnmadd_ps(a, b, c);
43670	let e = _mm512_setr_ps(
43671	`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `-9.`, `-10.`, `-11.`, `-12.`, `-13.`, `-14.`,
43672	);
43673	assert_eq_m512(r, e);
43674	}
43675
43676	#[simd_test(enable = "avx512f")]
43677	unsafe fn test_mm512_mask_fnmadd_ps() {
43678	let a = _mm512_set1_ps(`1.`);
43679	let b = _mm512_setr_ps(
43680	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43681	);
43682	let c = _mm512_set1_ps(`1.`);
43683	let r = _mm512_mask_fnmadd_ps(a, `0`, b, c);
43684	assert_eq_m512(r, a);
43685	let r = _mm512_mask_fnmadd_ps(a, `0b00000000_11111111`, b, c);
43686	let e = _mm512_setr_ps(
43687	`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
43688	);
43689	assert_eq_m512(r, e);
43690	}
43691
43692	#[simd_test(enable = "avx512f")]
43693	unsafe fn test_mm512_maskz_fnmadd_ps() {
43694	let a = _mm512_set1_ps(`1.`);
43695	let b = _mm512_setr_ps(
43696	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43697	);
43698	let c = _mm512_set1_ps(`1.`);
43699	let r = _mm512_maskz_fnmadd_ps(`0`, a, b, c);
43700	assert_eq_m512(r, _mm512_setzero_ps());
43701	let r = _mm512_maskz_fnmadd_ps(`0b00000000_11111111`, a, b, c);
43702	let e = _mm512_setr_ps(
43703	`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
43704	);
43705	assert_eq_m512(r, e);
43706	}
43707
43708	#[simd_test(enable = "avx512f")]
43709	unsafe fn test_mm512_mask3_fnmadd_ps() {
43710	let a = _mm512_set1_ps(`1.`);
43711	let b = _mm512_setr_ps(
43712	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43713	);
43714	let c = _mm512_setr_ps(
43715	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
43716	);
43717	let r = _mm512_mask3_fnmadd_ps(a, b, c, `0`);
43718	assert_eq_m512(r, c);
43719	let r = _mm512_mask3_fnmadd_ps(a, b, c, `0b00000000_11111111`);
43720	let e = _mm512_setr_ps(
43721	`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
43722	);
43723	assert_eq_m512(r, e);
43724	}
43725
43726	#[simd_test(enable = "avx512f,avx512vl")]
43727	unsafe fn test_mm256_mask_fnmadd_ps() {
43728	let a = _mm256_set1_ps(`1.`);
43729	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43730	let c = _mm256_set1_ps(`1.`);
43731	let r = _mm256_mask_fnmadd_ps(a, `0`, b, c);
43732	assert_eq_m256(r, a);
43733	let r = _mm256_mask_fnmadd_ps(a, `0b11111111`, b, c);
43734	let e = _mm256_set_ps(`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`);
43735	assert_eq_m256(r, e);
43736	}
43737
43738	#[simd_test(enable = "avx512f,avx512vl")]
43739	unsafe fn test_mm256_maskz_fnmadd_ps() {
43740	let a = _mm256_set1_ps(`1.`);
43741	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43742	let c = _mm256_set1_ps(`1.`);
43743	let r = _mm256_maskz_fnmadd_ps(`0`, a, b, c);
43744	assert_eq_m256(r, _mm256_setzero_ps());
43745	let r = _mm256_maskz_fnmadd_ps(`0b11111111`, a, b, c);
43746	let e = _mm256_set_ps(`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`);
43747	assert_eq_m256(r, e);
43748	}
43749
43750	#[simd_test(enable = "avx512f,avx512vl")]
43751	unsafe fn test_mm256_mask3_fnmadd_ps() {
43752	let a = _mm256_set1_ps(`1.`);
43753	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43754	let c = _mm256_set1_ps(`1.`);
43755	let r = _mm256_mask3_fnmadd_ps(a, b, c, `0`);
43756	assert_eq_m256(r, c);
43757	let r = _mm256_mask3_fnmadd_ps(a, b, c, `0b11111111`);
43758	let e = _mm256_set_ps(`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`);
43759	assert_eq_m256(r, e);
43760	}
43761
43762	#[simd_test(enable = "avx512f,avx512vl")]
43763	unsafe fn test_mm_mask_fnmadd_ps() {
43764	let a = _mm_set1_ps(`1.`);
43765	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43766	let c = _mm_set1_ps(`1.`);
43767	let r = _mm_mask_fnmadd_ps(a, `0`, b, c);
43768	assert_eq_m128(r, a);
43769	let r = _mm_mask_fnmadd_ps(a, `0b00001111`, b, c);
43770	let e = _mm_set_ps(`1.`, `0.`, `-1.`, `-2.`);
43771	assert_eq_m128(r, e);
43772	}
43773
43774	#[simd_test(enable = "avx512f,avx512vl")]
43775	unsafe fn test_mm_maskz_fnmadd_ps() {
43776	let a = _mm_set1_ps(`1.`);
43777	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43778	let c = _mm_set1_ps(`1.`);
43779	let r = _mm_maskz_fnmadd_ps(`0`, a, b, c);
43780	assert_eq_m128(r, _mm_setzero_ps());
43781	let r = _mm_maskz_fnmadd_ps(`0b00001111`, a, b, c);
43782	let e = _mm_set_ps(`1.`, `0.`, `-1.`, `-2.`);
43783	assert_eq_m128(r, e);
43784	}
43785
43786	#[simd_test(enable = "avx512f,avx512vl")]
43787	unsafe fn test_mm_mask3_fnmadd_ps() {
43788	let a = _mm_set1_ps(`1.`);
43789	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43790	let c = _mm_set1_ps(`1.`);
43791	let r = _mm_mask3_fnmadd_ps(a, b, c, `0`);
43792	assert_eq_m128(r, c);
43793	let r = _mm_mask3_fnmadd_ps(a, b, c, `0b00001111`);
43794	let e = _mm_set_ps(`1.`, `0.`, `-1.`, `-2.`);
43795	assert_eq_m128(r, e);
43796	}
43797
43798	#[simd_test(enable = "avx512f")]
43799	unsafe fn test_mm512_fnmsub_ps() {
43800	let a = _mm512_set1_ps(`1.`);
43801	let b = _mm512_setr_ps(
43802	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43803	);
43804	let c = _mm512_set1_ps(`1.`);
43805	let r = _mm512_fnmsub_ps(a, b, c);
43806	let e = _mm512_setr_ps(
43807	`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `-9.`, `-10.`, `-11.`, `-12.`, `-13.`, `-14.`, `-15.`, `-16.`,
43808	);
43809	assert_eq_m512(r, e);
43810	}
43811
43812	#[simd_test(enable = "avx512f")]
43813	unsafe fn test_mm512_mask_fnmsub_ps() {
43814	let a = _mm512_set1_ps(`1.`);
43815	let b = _mm512_setr_ps(
43816	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43817	);
43818	let c = _mm512_set1_ps(`1.`);
43819	let r = _mm512_mask_fnmsub_ps(a, `0`, b, c);
43820	assert_eq_m512(r, a);
43821	let r = _mm512_mask_fnmsub_ps(a, `0b00000000_11111111`, b, c);
43822	let e = _mm512_setr_ps(
43823	`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
43824	);
43825	assert_eq_m512(r, e);
43826	}
43827
43828	#[simd_test(enable = "avx512f")]
43829	unsafe fn test_mm512_maskz_fnmsub_ps() {
43830	let a = _mm512_set1_ps(`1.`);
43831	let b = _mm512_setr_ps(
43832	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43833	);
43834	let c = _mm512_set1_ps(`1.`);
43835	let r = _mm512_maskz_fnmsub_ps(`0`, a, b, c);
43836	assert_eq_m512(r, _mm512_setzero_ps());
43837	let r = _mm512_maskz_fnmsub_ps(`0b00000000_11111111`, a, b, c);
43838	let e = _mm512_setr_ps(
43839	`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
43840	);
43841	assert_eq_m512(r, e);
43842	}
43843
43844	#[simd_test(enable = "avx512f")]
43845	unsafe fn test_mm512_mask3_fnmsub_ps() {
43846	let a = _mm512_set1_ps(`1.`);
43847	let b = _mm512_setr_ps(
43848	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
43849	);
43850	let c = _mm512_setr_ps(
43851	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
43852	);
43853	let r = _mm512_mask3_fnmsub_ps(a, b, c, `0`);
43854	assert_eq_m512(r, c);
43855	let r = _mm512_mask3_fnmsub_ps(a, b, c, `0b00000000_11111111`);
43856	let e = _mm512_setr_ps(
43857	`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
43858	);
43859	assert_eq_m512(r, e);
43860	}
43861
43862	#[simd_test(enable = "avx512f,avx512vl")]
43863	unsafe fn test_mm256_mask_fnmsub_ps() {
43864	let a = _mm256_set1_ps(`1.`);
43865	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43866	let c = _mm256_set1_ps(`1.`);
43867	let r = _mm256_mask_fnmsub_ps(a, `0`, b, c);
43868	assert_eq_m256(r, a);
43869	let r = _mm256_mask_fnmsub_ps(a, `0b11111111`, b, c);
43870	let e = _mm256_set_ps(`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`);
43871	assert_eq_m256(r, e);
43872	}
43873
43874	#[simd_test(enable = "avx512f,avx512vl")]
43875	unsafe fn test_mm256_maskz_fnmsub_ps() {
43876	let a = _mm256_set1_ps(`1.`);
43877	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43878	let c = _mm256_set1_ps(`1.`);
43879	let r = _mm256_maskz_fnmsub_ps(`0`, a, b, c);
43880	assert_eq_m256(r, _mm256_setzero_ps());
43881	let r = _mm256_maskz_fnmsub_ps(`0b11111111`, a, b, c);
43882	let e = _mm256_set_ps(`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`);
43883	assert_eq_m256(r, e);
43884	}
43885
43886	#[simd_test(enable = "avx512f,avx512vl")]
43887	unsafe fn test_mm256_mask3_fnmsub_ps() {
43888	let a = _mm256_set1_ps(`1.`);
43889	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
43890	let c = _mm256_set1_ps(`1.`);
43891	let r = _mm256_mask3_fnmsub_ps(a, b, c, `0`);
43892	assert_eq_m256(r, c);
43893	let r = _mm256_mask3_fnmsub_ps(a, b, c, `0b11111111`);
43894	let e = _mm256_set_ps(`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`);
43895	assert_eq_m256(r, e);
43896	}
43897
43898	#[simd_test(enable = "avx512f,avx512vl")]
43899	unsafe fn test_mm_mask_fnmsub_ps() {
43900	let a = _mm_set1_ps(`1.`);
43901	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43902	let c = _mm_set1_ps(`1.`);
43903	let r = _mm_mask_fnmsub_ps(a, `0`, b, c);
43904	assert_eq_m128(r, a);
43905	let r = _mm_mask_fnmsub_ps(a, `0b00001111`, b, c);
43906	let e = _mm_set_ps(`-1.`, `-2.`, `-3.`, `-4.`);
43907	assert_eq_m128(r, e);
43908	}
43909
43910	#[simd_test(enable = "avx512f,avx512vl")]
43911	unsafe fn test_mm_maskz_fnmsub_ps() {
43912	let a = _mm_set1_ps(`1.`);
43913	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43914	let c = _mm_set1_ps(`1.`);
43915	let r = _mm_maskz_fnmsub_ps(`0`, a, b, c);
43916	assert_eq_m128(r, _mm_setzero_ps());
43917	let r = _mm_maskz_fnmsub_ps(`0b00001111`, a, b, c);
43918	let e = _mm_set_ps(`-1.`, `-2.`, `-3.`, `-4.`);
43919	assert_eq_m128(r, e);
43920	}
43921
43922	#[simd_test(enable = "avx512f,avx512vl")]
43923	unsafe fn test_mm_mask3_fnmsub_ps() {
43924	let a = _mm_set1_ps(`1.`);
43925	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
43926	let c = _mm_set1_ps(`1.`);
43927	let r = _mm_mask3_fnmsub_ps(a, b, c, `0`);
43928	assert_eq_m128(r, c);
43929	let r = _mm_mask3_fnmsub_ps(a, b, c, `0b00001111`);
43930	let e = _mm_set_ps(`-1.`, `-2.`, `-3.`, `-4.`);
43931	assert_eq_m128(r, e);
43932	}
43933
43934	#[simd_test(enable = "avx512f")]
43935	unsafe fn test_mm512_rcp14_ps() {
43936	let a = _mm512_set1_ps(`3.`);
43937	let r = _mm512_rcp14_ps(a);
43938	let e = _mm512_set1_ps(`0.33333206`);
43939	assert_eq_m512(r, e);
43940	}
43941
43942	#[simd_test(enable = "avx512f")]
43943	unsafe fn test_mm512_mask_rcp14_ps() {
43944	let a = _mm512_set1_ps(`3.`);
43945	let r = _mm512_mask_rcp14_ps(a, `0`, a);
43946	assert_eq_m512(r, a);
43947	let r = _mm512_mask_rcp14_ps(a, `0b11111111_00000000`, a);
43948	let e = _mm512_setr_ps(
43949	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `0.33333206`, `0.33333206`, `0.33333206`, `0.33333206`,
43950	`0.33333206`, `0.33333206`, `0.33333206`, `0.33333206`,
43951	);
43952	assert_eq_m512(r, e);
43953	}
43954
43955	#[simd_test(enable = "avx512f")]
43956	unsafe fn test_mm512_maskz_rcp14_ps() {
43957	let a = _mm512_set1_ps(`3.`);
43958	let r = _mm512_maskz_rcp14_ps(`0`, a);
43959	assert_eq_m512(r, _mm512_setzero_ps());
43960	let r = _mm512_maskz_rcp14_ps(`0b11111111_00000000`, a);
43961	let e = _mm512_setr_ps(
43962	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.33333206`, `0.33333206`, `0.33333206`, `0.33333206`,
43963	`0.33333206`, `0.33333206`, `0.33333206`, `0.33333206`,
43964	);
43965	assert_eq_m512(r, e);
43966	}
43967
43968	#[simd_test(enable = "avx512f,avx512vl")]
43969	unsafe fn test_mm256_rcp14_ps() {
43970	let a = _mm256_set1_ps(`3.`);
43971	let r = _mm256_rcp14_ps(a);
43972	let e = _mm256_set1_ps(`0.33333206`);
43973	assert_eq_m256(r, e);
43974	}
43975
43976	#[simd_test(enable = "avx512f,avx512vl")]
43977	unsafe fn test_mm256_mask_rcp14_ps() {
43978	let a = _mm256_set1_ps(`3.`);
43979	let r = _mm256_mask_rcp14_ps(a, `0`, a);
43980	assert_eq_m256(r, a);
43981	let r = _mm256_mask_rcp14_ps(a, `0b11111111`, a);
43982	let e = _mm256_set1_ps(`0.33333206`);
43983	assert_eq_m256(r, e);
43984	}
43985
43986	#[simd_test(enable = "avx512f,avx512vl")]
43987	unsafe fn test_mm256_maskz_rcp14_ps() {
43988	let a = _mm256_set1_ps(`3.`);
43989	let r = _mm256_maskz_rcp14_ps(`0`, a);
43990	assert_eq_m256(r, _mm256_setzero_ps());
43991	let r = _mm256_maskz_rcp14_ps(`0b11111111`, a);
43992	let e = _mm256_set1_ps(`0.33333206`);
43993	assert_eq_m256(r, e);
43994	}
43995
43996	#[simd_test(enable = "avx512f,avx512vl")]
43997	unsafe fn test_mm_rcp14_ps() {
43998	let a = _mm_set1_ps(`3.`);
43999	let r = _mm_rcp14_ps(a);
44000	let e = _mm_set1_ps(`0.33333206`);
44001	assert_eq_m128(r, e);
44002	}
44003
44004	#[simd_test(enable = "avx512f,avx512vl")]
44005	unsafe fn test_mm_mask_rcp14_ps() {
44006	let a = _mm_set1_ps(`3.`);
44007	let r = _mm_mask_rcp14_ps(a, `0`, a);
44008	assert_eq_m128(r, a);
44009	let r = _mm_mask_rcp14_ps(a, `0b00001111`, a);
44010	let e = _mm_set1_ps(`0.33333206`);
44011	assert_eq_m128(r, e);
44012	}
44013
44014	#[simd_test(enable = "avx512f,avx512vl")]
44015	unsafe fn test_mm_maskz_rcp14_ps() {
44016	let a = _mm_set1_ps(`3.`);
44017	let r = _mm_maskz_rcp14_ps(`0`, a);
44018	assert_eq_m128(r, _mm_setzero_ps());
44019	let r = _mm_maskz_rcp14_ps(`0b00001111`, a);
44020	let e = _mm_set1_ps(`0.33333206`);
44021	assert_eq_m128(r, e);
44022	}
44023
44024	#[simd_test(enable = "avx512f")]
44025	unsafe fn test_mm512_rsqrt14_ps() {
44026	let a = _mm512_set1_ps(`3.`);
44027	let r = _mm512_rsqrt14_ps(a);
44028	let e = _mm512_set1_ps(`0.5773392`);
44029	assert_eq_m512(r, e);
44030	}
44031
44032	#[simd_test(enable = "avx512f")]
44033	unsafe fn test_mm512_mask_rsqrt14_ps() {
44034	let a = _mm512_set1_ps(`3.`);
44035	let r = _mm512_mask_rsqrt14_ps(a, `0`, a);
44036	assert_eq_m512(r, a);
44037	let r = _mm512_mask_rsqrt14_ps(a, `0b11111111_00000000`, a);
44038	let e = _mm512_setr_ps(
44039	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `0.5773392`, `0.5773392`, `0.5773392`, `0.5773392`, `0.5773392`,
44040	`0.5773392`, `0.5773392`, `0.5773392`,
44041	);
44042	assert_eq_m512(r, e);
44043	}
44044
44045	#[simd_test(enable = "avx512f")]
44046	unsafe fn test_mm512_maskz_rsqrt14_ps() {
44047	let a = _mm512_set1_ps(`3.`);
44048	let r = _mm512_maskz_rsqrt14_ps(`0`, a);
44049	assert_eq_m512(r, _mm512_setzero_ps());
44050	let r = _mm512_maskz_rsqrt14_ps(`0b11111111_00000000`, a);
44051	let e = _mm512_setr_ps(
44052	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.5773392`, `0.5773392`, `0.5773392`, `0.5773392`, `0.5773392`,
44053	`0.5773392`, `0.5773392`, `0.5773392`,
44054	);
44055	assert_eq_m512(r, e);
44056	}
44057
44058	#[simd_test(enable = "avx512f,avx512vl")]
44059	unsafe fn test_mm256_mask_rsqrt14_ps() {
44060	let a = _mm256_set1_ps(`3.`);
44061	let r = _mm256_mask_rsqrt14_ps(a, `0`, a);
44062	assert_eq_m256(r, a);
44063	let r = _mm256_mask_rsqrt14_ps(a, `0b11111111`, a);
44064	let e = _mm256_set1_ps(`0.5773392`);
44065	assert_eq_m256(r, e);
44066	}
44067
44068	#[simd_test(enable = "avx512f,avx512vl")]
44069	unsafe fn test_mm256_maskz_rsqrt14_ps() {
44070	let a = _mm256_set1_ps(`3.`);
44071	let r = _mm256_maskz_rsqrt14_ps(`0`, a);
44072	assert_eq_m256(r, _mm256_setzero_ps());
44073	let r = _mm256_maskz_rsqrt14_ps(`0b11111111`, a);
44074	let e = _mm256_set1_ps(`0.5773392`);
44075	assert_eq_m256(r, e);
44076	}
44077
44078	#[simd_test(enable = "avx512f,avx512vl")]
44079	unsafe fn test_mm_mask_rsqrt14_ps() {
44080	let a = _mm_set1_ps(`3.`);
44081	let r = _mm_mask_rsqrt14_ps(a, `0`, a);
44082	assert_eq_m128(r, a);
44083	let r = _mm_mask_rsqrt14_ps(a, `0b00001111`, a);
44084	let e = _mm_set1_ps(`0.5773392`);
44085	assert_eq_m128(r, e);
44086	}
44087
44088	#[simd_test(enable = "avx512f,avx512vl")]
44089	unsafe fn test_mm_maskz_rsqrt14_ps() {
44090	let a = _mm_set1_ps(`3.`);
44091	let r = _mm_maskz_rsqrt14_ps(`0`, a);
44092	assert_eq_m128(r, _mm_setzero_ps());
44093	let r = _mm_maskz_rsqrt14_ps(`0b00001111`, a);
44094	let e = _mm_set1_ps(`0.5773392`);
44095	assert_eq_m128(r, e);
44096	}
44097
44098	#[simd_test(enable = "avx512f")]
44099	unsafe fn test_mm512_getexp_ps() {
44100	let a = _mm512_set1_ps(`3.`);
44101	let r = _mm512_getexp_ps(a);
44102	let e = _mm512_set1_ps(`1.`);
44103	assert_eq_m512(r, e);
44104	}
44105
44106	#[simd_test(enable = "avx512f")]
44107	unsafe fn test_mm512_mask_getexp_ps() {
44108	let a = _mm512_set1_ps(`3.`);
44109	let r = _mm512_mask_getexp_ps(a, `0`, a);
44110	assert_eq_m512(r, a);
44111	let r = _mm512_mask_getexp_ps(a, `0b11111111_00000000`, a);
44112	let e = _mm512_setr_ps(
44113	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
44114	);
44115	assert_eq_m512(r, e);
44116	}
44117
44118	#[simd_test(enable = "avx512f")]
44119	unsafe fn test_mm512_maskz_getexp_ps() {
44120	let a = _mm512_set1_ps(`3.`);
44121	let r = _mm512_maskz_getexp_ps(`0`, a);
44122	assert_eq_m512(r, _mm512_setzero_ps());
44123	let r = _mm512_maskz_getexp_ps(`0b11111111_00000000`, a);
44124	let e = _mm512_setr_ps(
44125	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
44126	);
44127	assert_eq_m512(r, e);
44128	}
44129
44130	#[simd_test(enable = "avx512f,avx512vl")]
44131	unsafe fn test_mm256_getexp_ps() {
44132	let a = _mm256_set1_ps(`3.`);
44133	let r = _mm256_getexp_ps(a);
44134	let e = _mm256_set1_ps(`1.`);
44135	assert_eq_m256(r, e);
44136	}
44137
44138	#[simd_test(enable = "avx512f,avx512vl")]
44139	unsafe fn test_mm256_mask_getexp_ps() {
44140	let a = _mm256_set1_ps(`3.`);
44141	let r = _mm256_mask_getexp_ps(a, `0`, a);
44142	assert_eq_m256(r, a);
44143	let r = _mm256_mask_getexp_ps(a, `0b11111111`, a);
44144	let e = _mm256_set1_ps(`1.`);
44145	assert_eq_m256(r, e);
44146	}
44147
44148	#[simd_test(enable = "avx512f,avx512vl")]
44149	unsafe fn test_mm256_maskz_getexp_ps() {
44150	let a = _mm256_set1_ps(`3.`);
44151	let r = _mm256_maskz_getexp_ps(`0`, a);
44152	assert_eq_m256(r, _mm256_setzero_ps());
44153	let r = _mm256_maskz_getexp_ps(`0b11111111`, a);
44154	let e = _mm256_set1_ps(`1.`);
44155	assert_eq_m256(r, e);
44156	}
44157
44158	#[simd_test(enable = "avx512f,avx512vl")]
44159	unsafe fn test_mm_getexp_ps() {
44160	let a = _mm_set1_ps(`3.`);
44161	let r = _mm_getexp_ps(a);
44162	let e = _mm_set1_ps(`1.`);
44163	assert_eq_m128(r, e);
44164	}
44165
44166	#[simd_test(enable = "avx512f,avx512vl")]
44167	unsafe fn test_mm_mask_getexp_ps() {
44168	let a = _mm_set1_ps(`3.`);
44169	let r = _mm_mask_getexp_ps(a, `0`, a);
44170	assert_eq_m128(r, a);
44171	let r = _mm_mask_getexp_ps(a, `0b00001111`, a);
44172	let e = _mm_set1_ps(`1.`);
44173	assert_eq_m128(r, e);
44174	}
44175
44176	#[simd_test(enable = "avx512f,avx512vl")]
44177	unsafe fn test_mm_maskz_getexp_ps() {
44178	let a = _mm_set1_ps(`3.`);
44179	let r = _mm_maskz_getexp_ps(`0`, a);
44180	assert_eq_m128(r, _mm_setzero_ps());
44181	let r = _mm_maskz_getexp_ps(`0b00001111`, a);
44182	let e = _mm_set1_ps(`1.`);
44183	assert_eq_m128(r, e);
44184	}
44185
44186	#[simd_test(enable = "avx512f")]
44187	unsafe fn test_mm512_roundscale_ps() {
44188	let a = _mm512_set1_ps(`1.1`);
44189	let r = _mm512_roundscale_ps::<`0b00_00_00_00`>(a);
44190	let e = _mm512_set1_ps(`1.0`);
44191	assert_eq_m512(r, e);
44192	}
44193
44194	#[simd_test(enable = "avx512f")]
44195	unsafe fn test_mm512_mask_roundscale_ps() {
44196	let a = _mm512_set1_ps(`1.1`);
44197	let r = _mm512_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0`, a);
44198	let e = _mm512_set1_ps(`1.1`);
44199	assert_eq_m512(r, e);
44200	let r = _mm512_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0b11111111_11111111`, a);
44201	let e = _mm512_set1_ps(`1.0`);
44202	assert_eq_m512(r, e);
44203	}
44204
44205	#[simd_test(enable = "avx512f")]
44206	unsafe fn test_mm512_maskz_roundscale_ps() {
44207	let a = _mm512_set1_ps(`1.1`);
44208	let r = _mm512_maskz_roundscale_ps::<`0b00_00_00_00`>(`0`, a);
44209	assert_eq_m512(r, _mm512_setzero_ps());
44210	let r = _mm512_maskz_roundscale_ps::<`0b00_00_00_00`>(`0b11111111_11111111`, a);
44211	let e = _mm512_set1_ps(`1.0`);
44212	assert_eq_m512(r, e);
44213	}
44214
44215	#[simd_test(enable = "avx512f,avx512vl")]
44216	unsafe fn test_mm256_roundscale_ps() {
44217	let a = _mm256_set1_ps(`1.1`);
44218	let r = _mm256_roundscale_ps::<`0b00_00_00_00`>(a);
44219	let e = _mm256_set1_ps(`1.0`);
44220	assert_eq_m256(r, e);
44221	}
44222
44223	#[simd_test(enable = "avx512f,avx512vl")]
44224	unsafe fn test_mm256_mask_roundscale_ps() {
44225	let a = _mm256_set1_ps(`1.1`);
44226	let r = _mm256_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0`, a);
44227	let e = _mm256_set1_ps(`1.1`);
44228	assert_eq_m256(r, e);
44229	let r = _mm256_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0b11111111`, a);
44230	let e = _mm256_set1_ps(`1.0`);
44231	assert_eq_m256(r, e);
44232	}
44233
44234	#[simd_test(enable = "avx512f,avx512vl")]
44235	unsafe fn test_mm256_maskz_roundscale_ps() {
44236	let a = _mm256_set1_ps(`1.1`);
44237	let r = _mm256_maskz_roundscale_ps::<`0b00_00_00_00`>(`0`, a);
44238	assert_eq_m256(r, _mm256_setzero_ps());
44239	let r = _mm256_maskz_roundscale_ps::<`0b00_00_00_00`>(`0b11111111`, a);
44240	let e = _mm256_set1_ps(`1.0`);
44241	assert_eq_m256(r, e);
44242	}
44243
44244	#[simd_test(enable = "avx512f,avx512vl")]
44245	unsafe fn test_mm_roundscale_ps() {
44246	let a = _mm_set1_ps(`1.1`);
44247	let r = _mm_roundscale_ps::<`0b00_00_00_00`>(a);
44248	let e = _mm_set1_ps(`1.0`);
44249	assert_eq_m128(r, e);
44250	}
44251
44252	#[simd_test(enable = "avx512f,avx512vl")]
44253	unsafe fn test_mm_mask_roundscale_ps() {
44254	let a = _mm_set1_ps(`1.1`);
44255	let r = _mm_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0`, a);
44256	let e = _mm_set1_ps(`1.1`);
44257	assert_eq_m128(r, e);
44258	let r = _mm_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0b00001111`, a);
44259	let e = _mm_set1_ps(`1.0`);
44260	assert_eq_m128(r, e);
44261	}
44262
44263	#[simd_test(enable = "avx512f,avx512vl")]
44264	unsafe fn test_mm_maskz_roundscale_ps() {
44265	let a = _mm_set1_ps(`1.1`);
44266	let r = _mm_maskz_roundscale_ps::<`0b00_00_00_00`>(`0`, a);
44267	assert_eq_m128(r, _mm_setzero_ps());
44268	let r = _mm_maskz_roundscale_ps::<`0b00_00_00_00`>(`0b00001111`, a);
44269	let e = _mm_set1_ps(`1.0`);
44270	assert_eq_m128(r, e);
44271	}
44272
44273	#[simd_test(enable = "avx512f")]
44274	unsafe fn test_mm512_scalef_ps() {
44275	let a = _mm512_set1_ps(`1.`);
44276	let b = _mm512_set1_ps(`3.`);
44277	let r = _mm512_scalef_ps(a, b);
44278	let e = _mm512_set1_ps(`8.`);
44279	assert_eq_m512(r, e);
44280	}
44281
44282	#[simd_test(enable = "avx512f")]
44283	unsafe fn test_mm512_mask_scalef_ps() {
44284	let a = _mm512_set1_ps(`1.`);
44285	let b = _mm512_set1_ps(`3.`);
44286	let r = _mm512_mask_scalef_ps(a, `0`, a, b);
44287	assert_eq_m512(r, a);
44288	let r = _mm512_mask_scalef_ps(a, `0b11111111_00000000`, a, b);
44289	let e = _mm512_set_ps(
44290	`8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
44291	);
44292	assert_eq_m512(r, e);
44293	}
44294
44295	#[simd_test(enable = "avx512f")]
44296	unsafe fn test_mm512_maskz_scalef_ps() {
44297	let a = _mm512_set1_ps(`1.`);
44298	let b = _mm512_set1_ps(`3.`);
44299	let r = _mm512_maskz_scalef_ps(`0`, a, b);
44300	assert_eq_m512(r, _mm512_setzero_ps());
44301	let r = _mm512_maskz_scalef_ps(`0b11111111_00000000`, a, b);
44302	let e = _mm512_set_ps(
44303	`8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
44304	);
44305	assert_eq_m512(r, e);
44306	}
44307
44308	#[simd_test(enable = "avx512f,avx512vl")]
44309	unsafe fn test_mm256_scalef_ps() {
44310	let a = _mm256_set1_ps(`1.`);
44311	let b = _mm256_set1_ps(`3.`);
44312	let r = _mm256_scalef_ps(a, b);
44313	let e = _mm256_set1_ps(`8.`);
44314	assert_eq_m256(r, e);
44315	}
44316
44317	#[simd_test(enable = "avx512f,avx512vl")]
44318	unsafe fn test_mm256_mask_scalef_ps() {
44319	let a = _mm256_set1_ps(`1.`);
44320	let b = _mm256_set1_ps(`3.`);
44321	let r = _mm256_mask_scalef_ps(a, `0`, a, b);
44322	assert_eq_m256(r, a);
44323	let r = _mm256_mask_scalef_ps(a, `0b11111111`, a, b);
44324	let e = _mm256_set1_ps(`8.`);
44325	assert_eq_m256(r, e);
44326	}
44327
44328	#[simd_test(enable = "avx512f,avx512vl")]
44329	unsafe fn test_mm256_maskz_scalef_ps() {
44330	let a = _mm256_set1_ps(`1.`);
44331	let b = _mm256_set1_ps(`3.`);
44332	let r = _mm256_maskz_scalef_ps(`0`, a, b);
44333	assert_eq_m256(r, _mm256_setzero_ps());
44334	let r = _mm256_maskz_scalef_ps(`0b11111111`, a, b);
44335	let e = _mm256_set1_ps(`8.`);
44336	assert_eq_m256(r, e);
44337	}
44338
44339	#[simd_test(enable = "avx512f,avx512vl")]
44340	unsafe fn test_mm_scalef_ps() {
44341	let a = _mm_set1_ps(`1.`);
44342	let b = _mm_set1_ps(`3.`);
44343	let r = _mm_scalef_ps(a, b);
44344	let e = _mm_set1_ps(`8.`);
44345	assert_eq_m128(r, e);
44346	}
44347
44348	#[simd_test(enable = "avx512f,avx512vl")]
44349	unsafe fn test_mm_mask_scalef_ps() {
44350	let a = _mm_set1_ps(`1.`);
44351	let b = _mm_set1_ps(`3.`);
44352	let r = _mm_mask_scalef_ps(a, `0`, a, b);
44353	assert_eq_m128(r, a);
44354	let r = _mm_mask_scalef_ps(a, `0b00001111`, a, b);
44355	let e = _mm_set1_ps(`8.`);
44356	assert_eq_m128(r, e);
44357	}
44358
44359	#[simd_test(enable = "avx512f,avx512vl")]
44360	unsafe fn test_mm_maskz_scalef_ps() {
44361	let a = _mm_set1_ps(`1.`);
44362	let b = _mm_set1_ps(`3.`);
44363	let r = _mm_maskz_scalef_ps(`0`, a, b);
44364	assert_eq_m128(r, _mm_setzero_ps());
44365	let r = _mm_maskz_scalef_ps(`0b00001111`, a, b);
44366	let e = _mm_set1_ps(`8.`);
44367	assert_eq_m128(r, e);
44368	}
44369
44370	#[simd_test(enable = "avx512f")]
44371	unsafe fn test_mm512_fixupimm_ps() {
44372	let a = _mm512_set1_ps(f32::NAN);
44373	let b = _mm512_set1_ps(f32::MAX);
44374	let c = _mm512_set1_epi32(i32::MAX);
44375	//let r = _mm512_fixupimm_ps(a, b, c, 5);
44376	let r = _mm512_fixupimm_ps::<`5`>(a, b, c);
44377	let e = _mm512_set1_ps(`0.0`);
44378	assert_eq_m512(r, e);
44379	}
44380
44381	#[simd_test(enable = "avx512f")]
44382	unsafe fn test_mm512_mask_fixupimm_ps() {
44383	#[rustfmt::skip]
44384	let a = _mm512_set_ps(
44385	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
44386	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
44387	`1.`, `1.`, `1.`, `1.`,
44388	`1.`, `1.`, `1.`, `1.`,
44389	);
44390	let b = _mm512_set1_ps(f32::MAX);
44391	let c = _mm512_set1_epi32(i32::MAX);
44392	let r = _mm512_mask_fixupimm_ps::<`5`>(a, `0b11111111_00000000`, b, c);
44393	let e = _mm512_set_ps(
44394	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
44395	);
44396	assert_eq_m512(r, e);
44397	}
44398
44399	#[simd_test(enable = "avx512f")]
44400	unsafe fn test_mm512_maskz_fixupimm_ps() {
44401	#[rustfmt::skip]
44402	let a = _mm512_set_ps(
44403	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
44404	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
44405	`1.`, `1.`, `1.`, `1.`,
44406	`1.`, `1.`, `1.`, `1.`,
44407	);
44408	let b = _mm512_set1_ps(f32::MAX);
44409	let c = _mm512_set1_epi32(i32::MAX);
44410	let r = _mm512_maskz_fixupimm_ps::<`5`>(`0b11111111_00000000`, a, b, c);
44411	let e = _mm512_set_ps(
44412	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
44413	);
44414	assert_eq_m512(r, e);
44415	}
44416
44417	#[simd_test(enable = "avx512f,avx512vl")]
44418	unsafe fn test_mm256_fixupimm_ps() {
44419	let a = _mm256_set1_ps(f32::NAN);
44420	let b = _mm256_set1_ps(f32::MAX);
44421	let c = _mm256_set1_epi32(i32::MAX);
44422	let r = _mm256_fixupimm_ps::<`5`>(a, b, c);
44423	let e = _mm256_set1_ps(`0.0`);
44424	assert_eq_m256(r, e);
44425	}
44426
44427	#[simd_test(enable = "avx512f,avx512vl")]
44428	unsafe fn test_mm256_mask_fixupimm_ps() {
44429	let a = _mm256_set1_ps(f32::NAN);
44430	let b = _mm256_set1_ps(f32::MAX);
44431	let c = _mm256_set1_epi32(i32::MAX);
44432	let r = _mm256_mask_fixupimm_ps::<`5`>(a, `0b11111111`, b, c);
44433	let e = _mm256_set1_ps(`0.0`);
44434	assert_eq_m256(r, e);
44435	}
44436
44437	#[simd_test(enable = "avx512f,avx512vl")]
44438	unsafe fn test_mm256_maskz_fixupimm_ps() {
44439	let a = _mm256_set1_ps(f32::NAN);
44440	let b = _mm256_set1_ps(f32::MAX);
44441	let c = _mm256_set1_epi32(i32::MAX);
44442	let r = _mm256_maskz_fixupimm_ps::<`5`>(`0b11111111`, a, b, c);
44443	let e = _mm256_set1_ps(`0.0`);
44444	assert_eq_m256(r, e);
44445	}
44446
44447	#[simd_test(enable = "avx512f,avx512vl")]
44448	unsafe fn test_mm_fixupimm_ps() {
44449	let a = _mm_set1_ps(f32::NAN);
44450	let b = _mm_set1_ps(f32::MAX);
44451	let c = _mm_set1_epi32(i32::MAX);
44452	let r = _mm_fixupimm_ps::<`5`>(a, b, c);
44453	let e = _mm_set1_ps(`0.0`);
44454	assert_eq_m128(r, e);
44455	}
44456
44457	#[simd_test(enable = "avx512f,avx512vl")]
44458	unsafe fn test_mm_mask_fixupimm_ps() {
44459	let a = _mm_set1_ps(f32::NAN);
44460	let b = _mm_set1_ps(f32::MAX);
44461	let c = _mm_set1_epi32(i32::MAX);
44462	let r = _mm_mask_fixupimm_ps::<`5`>(a, `0b00001111`, b, c);
44463	let e = _mm_set1_ps(`0.0`);
44464	assert_eq_m128(r, e);
44465	}
44466
44467	#[simd_test(enable = "avx512f,avx512vl")]
44468	unsafe fn test_mm_maskz_fixupimm_ps() {
44469	let a = _mm_set1_ps(f32::NAN);
44470	let b = _mm_set1_ps(f32::MAX);
44471	let c = _mm_set1_epi32(i32::MAX);
44472	let r = _mm_maskz_fixupimm_ps::<`5`>(`0b00001111`, a, b, c);
44473	let e = _mm_set1_ps(`0.0`);
44474	assert_eq_m128(r, e);
44475	}
44476
44477	#[simd_test(enable = "avx512f")]
44478	unsafe fn test_mm512_ternarylogic_epi32() {
44479	let a = _mm512_set1_epi32(`1` << `2`);
44480	let b = _mm512_set1_epi32(`1` << `1`);
44481	let c = _mm512_set1_epi32(`1` << `0`);
44482	let r = _mm512_ternarylogic_epi32::<`8`>(a, b, c);
44483	let e = _mm512_set1_epi32(`0`);
44484	assert_eq_m512i(r, e);
44485	}
44486
44487	#[simd_test(enable = "avx512f")]
44488	unsafe fn test_mm512_mask_ternarylogic_epi32() {
44489	let src = _mm512_set1_epi32(`1` << `2`);
44490	let a = _mm512_set1_epi32(`1` << `1`);
44491	let b = _mm512_set1_epi32(`1` << `0`);
44492	let r = _mm512_mask_ternarylogic_epi32::<`8`>(src, `0`, a, b);
44493	assert_eq_m512i(r, src);
44494	let r = _mm512_mask_ternarylogic_epi32::<`8`>(src, `0b11111111_11111111`, a, b);
44495	let e = _mm512_set1_epi32(`0`);
44496	assert_eq_m512i(r, e);
44497	}
44498
44499	#[simd_test(enable = "avx512f")]
44500	unsafe fn test_mm512_maskz_ternarylogic_epi32() {
44501	let a = _mm512_set1_epi32(`1` << `2`);
44502	let b = _mm512_set1_epi32(`1` << `1`);
44503	let c = _mm512_set1_epi32(`1` << `0`);
44504	let r = _mm512_maskz_ternarylogic_epi32::<`9`>(`0`, a, b, c);
44505	assert_eq_m512i(r, _mm512_setzero_si512());
44506	let r = _mm512_maskz_ternarylogic_epi32::<`8`>(`0b11111111_11111111`, a, b, c);
44507	let e = _mm512_set1_epi32(`0`);
44508	assert_eq_m512i(r, e);
44509	}
44510
44511	#[simd_test(enable = "avx512f,avx512vl")]
44512	unsafe fn test_mm256_ternarylogic_epi32() {
44513	let a = _mm256_set1_epi32(`1` << `2`);
44514	let b = _mm256_set1_epi32(`1` << `1`);
44515	let c = _mm256_set1_epi32(`1` << `0`);
44516	let r = _mm256_ternarylogic_epi32::<`8`>(a, b, c);
44517	let e = _mm256_set1_epi32(`0`);
44518	assert_eq_m256i(r, e);
44519	}
44520
44521	#[simd_test(enable = "avx512f,avx512vl")]
44522	unsafe fn test_mm256_mask_ternarylogic_epi32() {
44523	let src = _mm256_set1_epi32(`1` << `2`);
44524	let a = _mm256_set1_epi32(`1` << `1`);
44525	let b = _mm256_set1_epi32(`1` << `0`);
44526	let r = _mm256_mask_ternarylogic_epi32::<`8`>(src, `0`, a, b);
44527	assert_eq_m256i(r, src);
44528	let r = _mm256_mask_ternarylogic_epi32::<`8`>(src, `0b11111111`, a, b);
44529	let e = _mm256_set1_epi32(`0`);
44530	assert_eq_m256i(r, e);
44531	}
44532
44533	#[simd_test(enable = "avx512f,avx512vl")]
44534	unsafe fn test_mm256_maskz_ternarylogic_epi32() {
44535	let a = _mm256_set1_epi32(`1` << `2`);
44536	let b = _mm256_set1_epi32(`1` << `1`);
44537	let c = _mm256_set1_epi32(`1` << `0`);
44538	let r = _mm256_maskz_ternarylogic_epi32::<`9`>(`0`, a, b, c);
44539	assert_eq_m256i(r, _mm256_setzero_si256());
44540	let r = _mm256_maskz_ternarylogic_epi32::<`8`>(`0b11111111`, a, b, c);
44541	let e = _mm256_set1_epi32(`0`);
44542	assert_eq_m256i(r, e);
44543	}
44544
44545	#[simd_test(enable = "avx512f,avx512vl")]
44546	unsafe fn test_mm_ternarylogic_epi32() {
44547	let a = _mm_set1_epi32(`1` << `2`);
44548	let b = _mm_set1_epi32(`1` << `1`);
44549	let c = _mm_set1_epi32(`1` << `0`);
44550	let r = _mm_ternarylogic_epi32::<`8`>(a, b, c);
44551	let e = _mm_set1_epi32(`0`);
44552	assert_eq_m128i(r, e);
44553	}
44554
44555	#[simd_test(enable = "avx512f,avx512vl")]
44556	unsafe fn test_mm_mask_ternarylogic_epi32() {
44557	let src = _mm_set1_epi32(`1` << `2`);
44558	let a = _mm_set1_epi32(`1` << `1`);
44559	let b = _mm_set1_epi32(`1` << `0`);
44560	let r = _mm_mask_ternarylogic_epi32::<`8`>(src, `0`, a, b);
44561	assert_eq_m128i(r, src);
44562	let r = _mm_mask_ternarylogic_epi32::<`8`>(src, `0b00001111`, a, b);
44563	let e = _mm_set1_epi32(`0`);
44564	assert_eq_m128i(r, e);
44565	}
44566
44567	#[simd_test(enable = "avx512f,avx512vl")]
44568	unsafe fn test_mm_maskz_ternarylogic_epi32() {
44569	let a = _mm_set1_epi32(`1` << `2`);
44570	let b = _mm_set1_epi32(`1` << `1`);
44571	let c = _mm_set1_epi32(`1` << `0`);
44572	let r = _mm_maskz_ternarylogic_epi32::<`9`>(`0`, a, b, c);
44573	assert_eq_m128i(r, _mm_setzero_si128());
44574	let r = _mm_maskz_ternarylogic_epi32::<`8`>(`0b00001111`, a, b, c);
44575	let e = _mm_set1_epi32(`0`);
44576	assert_eq_m128i(r, e);
44577	}
44578
44579	#[simd_test(enable = "avx512f")]
44580	unsafe fn test_mm512_getmant_ps() {
44581	let a = _mm512_set1_ps(`10.`);
44582	let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
44583	let e = _mm512_set1_ps(`1.25`);
44584	assert_eq_m512(r, e);
44585	}
44586
44587	#[simd_test(enable = "avx512f")]
44588	unsafe fn test_mm512_mask_getmant_ps() {
44589	let a = _mm512_set1_ps(`10.`);
44590	let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a);
44591	assert_eq_m512(r, a);
44592	let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
44593	a,
44594	`0b11111111_00000000`,
44595	a,
44596	);
44597	let e = _mm512_setr_ps(
44598	`10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`,
44599	);
44600	assert_eq_m512(r, e);
44601	}
44602
44603	#[simd_test(enable = "avx512f")]
44604	unsafe fn test_mm512_maskz_getmant_ps() {
44605	let a = _mm512_set1_ps(`10.`);
44606	let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a);
44607	assert_eq_m512(r, _mm512_setzero_ps());
44608	let r =
44609	_mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b11111111_00000000`, a);
44610	let e = _mm512_setr_ps(
44611	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`,
44612	);
44613	assert_eq_m512(r, e);
44614	}
44615
44616	#[simd_test(enable = "avx512f,avx512vl")]
44617	unsafe fn test_mm256_getmant_ps() {
44618	let a = _mm256_set1_ps(`10.`);
44619	let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
44620	let e = _mm256_set1_ps(`1.25`);
44621	assert_eq_m256(r, e);
44622	}
44623
44624	#[simd_test(enable = "avx512f,avx512vl")]
44625	unsafe fn test_mm256_mask_getmant_ps() {
44626	let a = _mm256_set1_ps(`10.`);
44627	let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a);
44628	assert_eq_m256(r, a);
44629	let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0b11111111`, a);
44630	let e = _mm256_set1_ps(`1.25`);
44631	assert_eq_m256(r, e);
44632	}
44633
44634	#[simd_test(enable = "avx512f,avx512vl")]
44635	unsafe fn test_mm256_maskz_getmant_ps() {
44636	let a = _mm256_set1_ps(`10.`);
44637	let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a);
44638	assert_eq_m256(r, _mm256_setzero_ps());
44639	let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b11111111`, a);
44640	let e = _mm256_set1_ps(`1.25`);
44641	assert_eq_m256(r, e);
44642	}
44643
44644	#[simd_test(enable = "avx512f,avx512vl")]
44645	unsafe fn test_mm_getmant_ps() {
44646	let a = _mm_set1_ps(`10.`);
44647	let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
44648	let e = _mm_set1_ps(`1.25`);
44649	assert_eq_m128(r, e);
44650	}
44651
44652	#[simd_test(enable = "avx512f,avx512vl")]
44653	unsafe fn test_mm_mask_getmant_ps() {
44654	let a = _mm_set1_ps(`10.`);
44655	let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a);
44656	assert_eq_m128(r, a);
44657	let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0b00001111`, a);
44658	let e = _mm_set1_ps(`1.25`);
44659	assert_eq_m128(r, e);
44660	}
44661
44662	#[simd_test(enable = "avx512f,avx512vl")]
44663	unsafe fn test_mm_maskz_getmant_ps() {
44664	let a = _mm_set1_ps(`10.`);
44665	let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a);
44666	assert_eq_m128(r, _mm_setzero_ps());
44667	let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b00001111`, a);
44668	let e = _mm_set1_ps(`1.25`);
44669	assert_eq_m128(r, e);
44670	}
44671
44672	#[simd_test(enable = "avx512f")]
44673	unsafe fn test_mm512_add_round_ps() {
44674	let a = _mm512_setr_ps(
44675	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
44676	);
44677	let b = _mm512_set1_ps(`-1.`);
44678	let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
44679	#[rustfmt::skip]
44680	let e = _mm512_setr_ps(
44681	`-1.`, `0.5`, `1.`, `2.5`,
44682	`3.`, `4.5`, `5.`, `6.5`,
44683	`7.`, `8.5`, `9.`, `10.5`,
44684	`11.`, `12.5`, `13.`, `-0.99999994`,
44685	);
44686	assert_eq_m512(r, e);
44687	let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
44688	let e = _mm512_setr_ps(
44689	`-1.`, `0.5`, `1.`, `2.5`, `3.`, `4.5`, `5.`, `6.5`, `7.`, `8.5`, `9.`, `10.5`, `11.`, `12.5`, `13.`, `-0.9999999`,
44690	);
44691	assert_eq_m512(r, e);
44692	}
44693
44694	#[simd_test(enable = "avx512f")]
44695	unsafe fn test_mm512_mask_add_round_ps() {
44696	let a = _mm512_setr_ps(
44697	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
44698	);
44699	let b = _mm512_set1_ps(`-1.`);
44700	let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, `0`, a, b);
44701	assert_eq_m512(r, a);
44702	let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44703	a,
44704	`0b11111111_00000000`,
44705	a,
44706	b,
44707	);
44708	#[rustfmt::skip]
44709	let e = _mm512_setr_ps(
44710	`0.`, `1.5`, `2.`, `3.5`,
44711	`4.`, `5.5`, `6.`, `7.5`,
44712	`7.`, `8.5`, `9.`, `10.5`,
44713	`11.`, `12.5`, `13.`, `-0.99999994`,
44714	);
44715	assert_eq_m512(r, e);
44716	}
44717
44718	#[simd_test(enable = "avx512f")]
44719	unsafe fn test_mm512_maskz_add_round_ps() {
44720	let a = _mm512_setr_ps(
44721	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
44722	);
44723	let b = _mm512_set1_ps(`-1.`);
44724	let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
44725	assert_eq_m512(r, _mm512_setzero_ps());
44726	let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44727	`0b11111111_00000000`,
44728	a,
44729	b,
44730	);
44731	#[rustfmt::skip]
44732	let e = _mm512_setr_ps(
44733	`0.`, `0.`, `0.`, `0.`,
44734	`0.`, `0.`, `0.`, `0.`,
44735	`7.`, `8.5`, `9.`, `10.5`,
44736	`11.`, `12.5`, `13.`, `-0.99999994`,
44737	);
44738	assert_eq_m512(r, e);
44739	}
44740
44741	#[simd_test(enable = "avx512f")]
44742	unsafe fn test_mm512_sub_round_ps() {
44743	let a = _mm512_setr_ps(
44744	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
44745	);
44746	let b = _mm512_set1_ps(`1.`);
44747	let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
44748	#[rustfmt::skip]
44749	let e = _mm512_setr_ps(
44750	`-1.`, `0.5`, `1.`, `2.5`,
44751	`3.`, `4.5`, `5.`, `6.5`,
44752	`7.`, `8.5`, `9.`, `10.5`,
44753	`11.`, `12.5`, `13.`, `-0.99999994`,
44754	);
44755	assert_eq_m512(r, e);
44756	let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
44757	let e = _mm512_setr_ps(
44758	`-1.`, `0.5`, `1.`, `2.5`, `3.`, `4.5`, `5.`, `6.5`, `7.`, `8.5`, `9.`, `10.5`, `11.`, `12.5`, `13.`, `-0.9999999`,
44759	);
44760	assert_eq_m512(r, e);
44761	}
44762
44763	#[simd_test(enable = "avx512f")]
44764	unsafe fn test_mm512_mask_sub_round_ps() {
44765	let a = _mm512_setr_ps(
44766	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
44767	);
44768	let b = _mm512_set1_ps(`1.`);
44769	let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44770	a, `0`, a, b,
44771	);
44772	assert_eq_m512(r, a);
44773	let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44774	a,
44775	`0b11111111_00000000`,
44776	a,
44777	b,
44778	);
44779	#[rustfmt::skip]
44780	let e = _mm512_setr_ps(
44781	`0.`, `1.5`, `2.`, `3.5`,
44782	`4.`, `5.5`, `6.`, `7.5`,
44783	`7.`, `8.5`, `9.`, `10.5`,
44784	`11.`, `12.5`, `13.`, `-0.99999994`,
44785	);
44786	assert_eq_m512(r, e);
44787	}
44788
44789	#[simd_test(enable = "avx512f")]
44790	unsafe fn test_mm512_maskz_sub_round_ps() {
44791	let a = _mm512_setr_ps(
44792	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
44793	);
44794	let b = _mm512_set1_ps(`1.`);
44795	let r =
44796	_mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
44797	assert_eq_m512(r, _mm512_setzero_ps());
44798	let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44799	`0b11111111_00000000`,
44800	a,
44801	b,
44802	);
44803	#[rustfmt::skip]
44804	let e = _mm512_setr_ps(
44805	`0.`, `0.`, `0.`, `0.`,
44806	`0.`, `0.`, `0.`, `0.`,
44807	`7.`, `8.5`, `9.`, `10.5`,
44808	`11.`, `12.5`, `13.`, `-0.99999994`,
44809	);
44810	assert_eq_m512(r, e);
44811	}
44812
44813	#[simd_test(enable = "avx512f")]
44814	unsafe fn test_mm512_mul_round_ps() {
44815	#[rustfmt::skip]
44816	let a = _mm512_setr_ps(
44817	`0.`, `1.5`, `2.`, `3.5`,
44818	`4.`, `5.5`, `6.`, `7.5`,
44819	`8.`, `9.5`, `10.`, `11.5`,
44820	`12.`, `13.5`, `14.`, `0.00000000000000000000007`,
44821	);
44822	let b = _mm512_set1_ps(`0.1`);
44823	let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
44824	#[rustfmt::skip]
44825	let e = _mm512_setr_ps(
44826	`0.`, `0.15`, `0.2`, `0.35`,
44827	`0.4`, `0.55`, `0.6`, `0.75`,
44828	`0.8`, `0.95`, `1.0`, `1.15`,
44829	`1.2`, `1.35`, `1.4`, `0.000000000000000000000007000001`,
44830	);
44831	assert_eq_m512(r, e);
44832	let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
44833	#[rustfmt::skip]
44834	let e = _mm512_setr_ps(
44835	`0.`, `0.14999999`, `0.2`, `0.35`,
44836	`0.4`, `0.54999995`, `0.59999996`, `0.75`,
44837	`0.8`, `0.95`, `1.0`, `1.15`,
44838	`1.1999999`, `1.3499999`, `1.4`, `0.000000000000000000000007`,
44839	);
44840	assert_eq_m512(r, e);
44841	}
44842
44843	#[simd_test(enable = "avx512f")]
44844	unsafe fn test_mm512_mask_mul_round_ps() {
44845	#[rustfmt::skip]
44846	let a = _mm512_setr_ps(
44847	`0.`, `1.5`, `2.`, `3.5`,
44848	`4.`, `5.5`, `6.`, `7.5`,
44849	`8.`, `9.5`, `10.`, `11.5`,
44850	`12.`, `13.5`, `14.`, `0.00000000000000000000007`,
44851	);
44852	let b = _mm512_set1_ps(`0.1`);
44853	let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44854	a, `0`, a, b,
44855	);
44856	assert_eq_m512(r, a);
44857	let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44858	a,
44859	`0b11111111_00000000`,
44860	a,
44861	b,
44862	);
44863	#[rustfmt::skip]
44864	let e = _mm512_setr_ps(
44865	`0.`, `1.5`, `2.`, `3.5`,
44866	`4.`, `5.5`, `6.`, `7.5`,
44867	`0.8`, `0.95`, `1.0`, `1.15`,
44868	`1.2`, `1.35`, `1.4`, `0.000000000000000000000007000001`,
44869	);
44870	assert_eq_m512(r, e);
44871	}
44872
44873	#[simd_test(enable = "avx512f")]
44874	unsafe fn test_mm512_maskz_mul_round_ps() {
44875	#[rustfmt::skip]
44876	let a = _mm512_setr_ps(
44877	`0.`, `1.5`, `2.`, `3.5`,
44878	`4.`, `5.5`, `6.`, `7.5`,
44879	`8.`, `9.5`, `10.`, `11.5`,
44880	`12.`, `13.5`, `14.`, `0.00000000000000000000007`,
44881	);
44882	let b = _mm512_set1_ps(`0.1`);
44883	let r =
44884	_mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
44885	assert_eq_m512(r, _mm512_setzero_ps());
44886	let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44887	`0b11111111_00000000`,
44888	a,
44889	b,
44890	);
44891	#[rustfmt::skip]
44892	let e = _mm512_setr_ps(
44893	`0.`, `0.`, `0.`, `0.`,
44894	`0.`, `0.`, `0.`, `0.`,
44895	`0.8`, `0.95`, `1.0`, `1.15`,
44896	`1.2`, `1.35`, `1.4`, `0.000000000000000000000007000001`,
44897	);
44898	assert_eq_m512(r, e);
44899	}
44900
44901	#[simd_test(enable = "avx512f")]
44902	unsafe fn test_mm512_div_round_ps() {
44903	let a = _mm512_set1_ps(`1.`);
44904	let b = _mm512_set1_ps(`3.`);
44905	let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
44906	let e = _mm512_set1_ps(`0.33333334`);
44907	assert_eq_m512(r, e);
44908	let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
44909	let e = _mm512_set1_ps(`0.3333333`);
44910	assert_eq_m512(r, e);
44911	}
44912
44913	#[simd_test(enable = "avx512f")]
44914	unsafe fn test_mm512_mask_div_round_ps() {
44915	let a = _mm512_set1_ps(`1.`);
44916	let b = _mm512_set1_ps(`3.`);
44917	let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44918	a, `0`, a, b,
44919	);
44920	assert_eq_m512(r, a);
44921	let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44922	a,
44923	`0b11111111_00000000`,
44924	a,
44925	b,
44926	);
44927	let e = _mm512_setr_ps(
44928	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.33333334`, `0.33333334`, `0.33333334`, `0.33333334`,
44929	`0.33333334`, `0.33333334`, `0.33333334`, `0.33333334`,
44930	);
44931	assert_eq_m512(r, e);
44932	}
44933
44934	#[simd_test(enable = "avx512f")]
44935	unsafe fn test_mm512_maskz_div_round_ps() {
44936	let a = _mm512_set1_ps(`1.`);
44937	let b = _mm512_set1_ps(`3.`);
44938	let r =
44939	_mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
44940	assert_eq_m512(r, _mm512_setzero_ps());
44941	let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44942	`0b11111111_00000000`,
44943	a,
44944	b,
44945	);
44946	let e = _mm512_setr_ps(
44947	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.33333334`, `0.33333334`, `0.33333334`, `0.33333334`,
44948	`0.33333334`, `0.33333334`, `0.33333334`, `0.33333334`,
44949	);
44950	assert_eq_m512(r, e);
44951	}
44952
44953	#[simd_test(enable = "avx512f")]
44954	unsafe fn test_mm512_sqrt_round_ps() {
44955	let a = _mm512_set1_ps(`3.`);
44956	let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
44957	let e = _mm512_set1_ps(`1.7320508`);
44958	assert_eq_m512(r, e);
44959	let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF \| _MM_FROUND_NO_EXC }>(a);
44960	let e = _mm512_set1_ps(`1.7320509`);
44961	assert_eq_m512(r, e);
44962	}
44963
44964	#[simd_test(enable = "avx512f")]
44965	unsafe fn test_mm512_mask_sqrt_round_ps() {
44966	let a = _mm512_set1_ps(`3.`);
44967	let r =
44968	_mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, `0`, a);
44969	assert_eq_m512(r, a);
44970	let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44971	a,
44972	`0b11111111_00000000`,
44973	a,
44974	);
44975	let e = _mm512_setr_ps(
44976	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `1.7320508`, `1.7320508`, `1.7320508`, `1.7320508`, `1.7320508`,
44977	`1.7320508`, `1.7320508`, `1.7320508`,
44978	);
44979	assert_eq_m512(r, e);
44980	}
44981
44982	#[simd_test(enable = "avx512f")]
44983	unsafe fn test_mm512_maskz_sqrt_round_ps() {
44984	let a = _mm512_set1_ps(`3.`);
44985	let r =
44986	_mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a);
44987	assert_eq_m512(r, _mm512_setzero_ps());
44988	let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
44989	`0b11111111_00000000`,
44990	a,
44991	);
44992	let e = _mm512_setr_ps(
44993	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.7320508`, `1.7320508`, `1.7320508`, `1.7320508`, `1.7320508`,
44994	`1.7320508`, `1.7320508`, `1.7320508`,
44995	);
44996	assert_eq_m512(r, e);
44997	}
44998
44999	#[simd_test(enable = "avx512f")]
45000	unsafe fn test_mm512_fmadd_round_ps() {
45001	let a = _mm512_set1_ps(`0.00000007`);
45002	let b = _mm512_set1_ps(`1.`);
45003	let c = _mm512_set1_ps(`-1.`);
45004	let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
45005	let e = _mm512_set1_ps(`-0.99999994`);
45006	assert_eq_m512(r, e);
45007	let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
45008	let e = _mm512_set1_ps(`-0.9999999`);
45009	assert_eq_m512(r, e);
45010	}
45011
45012	#[simd_test(enable = "avx512f")]
45013	unsafe fn test_mm512_mask_fmadd_round_ps() {
45014	let a = _mm512_set1_ps(`0.00000007`);
45015	let b = _mm512_set1_ps(`1.`);
45016	let c = _mm512_set1_ps(`-1.`);
45017	let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45018	a, `0`, b, c,
45019	);
45020	assert_eq_m512(r, a);
45021	let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45022	a,
45023	`0b00000000_11111111`,
45024	b,
45025	c,
45026	);
45027	#[rustfmt::skip]
45028	let e = _mm512_setr_ps(
45029	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
45030	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
45031	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
45032	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
45033	);
45034	assert_eq_m512(r, e);
45035	}
45036
45037	#[simd_test(enable = "avx512f")]
45038	unsafe fn test_mm512_maskz_fmadd_round_ps() {
45039	let a = _mm512_set1_ps(`0.00000007`);
45040	let b = _mm512_set1_ps(`1.`);
45041	let c = _mm512_set1_ps(`-1.`);
45042	let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45043	`0`, a, b, c,
45044	);
45045	assert_eq_m512(r, _mm512_setzero_ps());
45046	#[rustfmt::skip]
45047	let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45048	`0b00000000_11111111`,
45049	a,
45050	b,
45051	c,
45052	);
45053	#[rustfmt::skip]
45054	let e = _mm512_setr_ps(
45055	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
45056	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
45057	`0.`, `0.`, `0.`, `0.`,
45058	`0.`, `0.`, `0.`, `0.`,
45059	);
45060	assert_eq_m512(r, e);
45061	}
45062
45063	#[simd_test(enable = "avx512f")]
45064	unsafe fn test_mm512_mask3_fmadd_round_ps() {
45065	let a = _mm512_set1_ps(`0.00000007`);
45066	let b = _mm512_set1_ps(`1.`);
45067	let c = _mm512_set1_ps(`-1.`);
45068	let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45069	a, b, c, `0`,
45070	);
45071	assert_eq_m512(r, c);
45072	let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45073	a,
45074	b,
45075	c,
45076	`0b00000000_11111111`,
45077	);
45078	#[rustfmt::skip]
45079	let e = _mm512_setr_ps(
45080	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
45081	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
45082	`-1.`, `-1.`, `-1.`, `-1.`,
45083	`-1.`, `-1.`, `-1.`, `-1.`,
45084	);
45085	assert_eq_m512(r, e);
45086	}
45087
45088	#[simd_test(enable = "avx512f")]
45089	unsafe fn test_mm512_fmsub_round_ps() {
45090	let a = _mm512_set1_ps(`0.00000007`);
45091	let b = _mm512_set1_ps(`1.`);
45092	let c = _mm512_set1_ps(`1.`);
45093	let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
45094	let e = _mm512_set1_ps(`-0.99999994`);
45095	assert_eq_m512(r, e);
45096	let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
45097	let e = _mm512_set1_ps(`-0.9999999`);
45098	assert_eq_m512(r, e);
45099	}
45100
45101	#[simd_test(enable = "avx512f")]
45102	unsafe fn test_mm512_mask_fmsub_round_ps() {
45103	let a = _mm512_set1_ps(`0.00000007`);
45104	let b = _mm512_set1_ps(`1.`);
45105	let c = _mm512_set1_ps(`1.`);
45106	let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45107	a, `0`, b, c,
45108	);
45109	assert_eq_m512(r, a);
45110	let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45111	a,
45112	`0b00000000_11111111`,
45113	b,
45114	c,
45115	);
45116	#[rustfmt::skip]
45117	let e = _mm512_setr_ps(
45118	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
45119	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
45120	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
45121	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
45122	);
45123	assert_eq_m512(r, e);
45124	}
45125
45126	#[simd_test(enable = "avx512f")]
45127	unsafe fn test_mm512_maskz_fmsub_round_ps() {
45128	let a = _mm512_set1_ps(`0.00000007`);
45129	let b = _mm512_set1_ps(`1.`);
45130	let c = _mm512_set1_ps(`1.`);
45131	let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45132	`0`, a, b, c,
45133	);
45134	assert_eq_m512(r, _mm512_setzero_ps());
45135	let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45136	`0b00000000_11111111`,
45137	a,
45138	b,
45139	c,
45140	);
45141	#[rustfmt::skip]
45142	let e = _mm512_setr_ps(
45143	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
45144	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
45145	`0.`, `0.`, `0.`, `0.`,
45146	`0.`, `0.`, `0.`, `0.`,
45147	);
45148	assert_eq_m512(r, e);
45149	}
45150
45151	#[simd_test(enable = "avx512f")]
45152	unsafe fn test_mm512_mask3_fmsub_round_ps() {
45153	let a = _mm512_set1_ps(`0.00000007`);
45154	let b = _mm512_set1_ps(`1.`);
45155	let c = _mm512_set1_ps(`1.`);
45156	let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45157	a, b, c, `0`,
45158	);
45159	assert_eq_m512(r, c);
45160	let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45161	a,
45162	b,
45163	c,
45164	`0b00000000_11111111`,
45165	);
45166	#[rustfmt::skip]
45167	let e = _mm512_setr_ps(
45168	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
45169	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
45170	`1.`, `1.`, `1.`, `1.`,
45171	`1.`, `1.`, `1.`, `1.`,
45172	);
45173	assert_eq_m512(r, e);
45174	}
45175
45176	#[simd_test(enable = "avx512f")]
45177	unsafe fn test_mm512_fmaddsub_round_ps() {
45178	let a = _mm512_set1_ps(`0.00000007`);
45179	let b = _mm512_set1_ps(`1.`);
45180	let c = _mm512_set1_ps(`-1.`);
45181	let r =
45182	_mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
45183	#[rustfmt::skip]
45184	let e = _mm512_setr_ps(
45185	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
45186	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
45187	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
45188	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
45189	);
45190	assert_eq_m512(r, e);
45191	let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
45192	let e = _mm512_setr_ps(
45193	`1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`,
45194	`-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`,
45195	);
45196	assert_eq_m512(r, e);
45197	}
45198
45199	#[simd_test(enable = "avx512f")]
45200	unsafe fn test_mm512_mask_fmaddsub_round_ps() {
45201	let a = _mm512_set1_ps(`0.00000007`);
45202	let b = _mm512_set1_ps(`1.`);
45203	let c = _mm512_set1_ps(`-1.`);
45204	let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45205	a, `0`, b, c,
45206	);
45207	assert_eq_m512(r, a);
45208	let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45209	a,
45210	`0b00000000_11111111`,
45211	b,
45212	c,
45213	);
45214	#[rustfmt::skip]
45215	let e = _mm512_setr_ps(
45216	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
45217	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
45218	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
45219	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
45220	);
45221	assert_eq_m512(r, e);
45222	}
45223
45224	#[simd_test(enable = "avx512f")]
45225	unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
45226	let a = _mm512_set1_ps(`0.00000007`);
45227	let b = _mm512_set1_ps(`1.`);
45228	let c = _mm512_set1_ps(`-1.`);
45229	let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45230	`0`, a, b, c,
45231	);
45232	assert_eq_m512(r, _mm512_setzero_ps());
45233	let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45234	`0b00000000_11111111`,
45235	a,
45236	b,
45237	c,
45238	);
45239	#[rustfmt::skip]
45240	let e = _mm512_setr_ps(
45241	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
45242	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
45243	`0.`, `0.`, `0.`, `0.`,
45244	`0.`, `0.`, `0.`, `0.`,
45245	);
45246	assert_eq_m512(r, e);
45247	}
45248
45249	#[simd_test(enable = "avx512f")]
45250	unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
45251	let a = _mm512_set1_ps(`0.00000007`);
45252	let b = _mm512_set1_ps(`1.`);
45253	let c = _mm512_set1_ps(`-1.`);
45254	let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45255	a, b, c, `0`,
45256	);
45257	assert_eq_m512(r, c);
45258	let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45259	a,
45260	b,
45261	c,
45262	`0b00000000_11111111`,
45263	);
45264	#[rustfmt::skip]
45265	let e = _mm512_setr_ps(
45266	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
45267	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
45268	`-1.`, `-1.`, `-1.`, `-1.`,
45269	`-1.`, `-1.`, `-1.`, `-1.`,
45270	);
45271	assert_eq_m512(r, e);
45272	}
45273
45274	#[simd_test(enable = "avx512f")]
45275	unsafe fn test_mm512_fmsubadd_round_ps() {
45276	let a = _mm512_set1_ps(`0.00000007`);
45277	let b = _mm512_set1_ps(`1.`);
45278	let c = _mm512_set1_ps(`-1.`);
45279	let r =
45280	_mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
45281	#[rustfmt::skip]
45282	let e = _mm512_setr_ps(
45283	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
45284	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
45285	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
45286	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
45287	);
45288	assert_eq_m512(r, e);
45289	let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
45290	let e = _mm512_setr_ps(
45291	`-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`,
45292	`-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`,
45293	);
45294	assert_eq_m512(r, e);
45295	}
45296
45297	#[simd_test(enable = "avx512f")]
45298	unsafe fn test_mm512_mask_fmsubadd_round_ps() {
45299	let a = _mm512_set1_ps(`0.00000007`);
45300	let b = _mm512_set1_ps(`1.`);
45301	let c = _mm512_set1_ps(`-1.`);
45302	let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45303	a, `0`, b, c,
45304	);
45305	assert_eq_m512(r, a);
45306	let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45307	a,
45308	`0b00000000_11111111`,
45309	b,
45310	c,
45311	);
45312	#[rustfmt::skip]
45313	let e = _mm512_setr_ps(
45314	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
45315	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
45316	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
45317	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
45318	);
45319	assert_eq_m512(r, e);
45320	}
45321
45322	#[simd_test(enable = "avx512f")]
45323	unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
45324	let a = _mm512_set1_ps(`0.00000007`);
45325	let b = _mm512_set1_ps(`1.`);
45326	let c = _mm512_set1_ps(`-1.`);
45327	let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45328	`0`, a, b, c,
45329	);
45330	assert_eq_m512(r, _mm512_setzero_ps());
45331	let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45332	`0b00000000_11111111`,
45333	a,
45334	b,
45335	c,
45336	);
45337	#[rustfmt::skip]
45338	let e = _mm512_setr_ps(
45339	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
45340	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
45341	`0.`, `0.`, `0.`, `0.`,
45342	`0.`, `0.`, `0.`, `0.`,
45343	);
45344	assert_eq_m512(r, e);
45345	}
45346
45347	#[simd_test(enable = "avx512f")]
45348	unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
45349	let a = _mm512_set1_ps(`0.00000007`);
45350	let b = _mm512_set1_ps(`1.`);
45351	let c = _mm512_set1_ps(`-1.`);
45352	let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45353	a, b, c, `0`,
45354	);
45355	assert_eq_m512(r, c);
45356	let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45357	a,
45358	b,
45359	c,
45360	`0b00000000_11111111`,
45361	);
45362	#[rustfmt::skip]
45363	let e = _mm512_setr_ps(
45364	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
45365	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
45366	`-1.`, `-1.`, `-1.`, `-1.`,
45367	`-1.`, `-1.`, `-1.`, `-1.`,
45368	);
45369	assert_eq_m512(r, e);
45370	}
45371
45372	#[simd_test(enable = "avx512f")]
45373	unsafe fn test_mm512_fnmadd_round_ps() {
45374	let a = _mm512_set1_ps(`0.00000007`);
45375	let b = _mm512_set1_ps(`1.`);
45376	let c = _mm512_set1_ps(`1.`);
45377	let r =
45378	_mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
45379	let e = _mm512_set1_ps(`0.99999994`);
45380	assert_eq_m512(r, e);
45381	let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
45382	let e = _mm512_set1_ps(`0.9999999`);
45383	assert_eq_m512(r, e);
45384	}
45385
45386	#[simd_test(enable = "avx512f")]
45387	unsafe fn test_mm512_mask_fnmadd_round_ps() {
45388	let a = _mm512_set1_ps(`0.00000007`);
45389	let b = _mm512_set1_ps(`1.`);
45390	let c = _mm512_set1_ps(`1.`);
45391	let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45392	a, `0`, b, c,
45393	);
45394	assert_eq_m512(r, a);
45395	let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45396	a,
45397	`0b00000000_11111111`,
45398	b,
45399	c,
45400	);
45401	let e = _mm512_setr_ps(
45402	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
45403	`0.99999994`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
45404	`0.00000007`, `0.00000007`,
45405	);
45406	assert_eq_m512(r, e);
45407	}
45408
45409	#[simd_test(enable = "avx512f")]
45410	unsafe fn test_mm512_maskz_fnmadd_round_ps() {
45411	let a = _mm512_set1_ps(`0.00000007`);
45412	let b = _mm512_set1_ps(`1.`);
45413	let c = _mm512_set1_ps(`1.`);
45414	let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45415	`0`, a, b, c,
45416	);
45417	assert_eq_m512(r, _mm512_setzero_ps());
45418	let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45419	`0b00000000_11111111`,
45420	a,
45421	b,
45422	c,
45423	);
45424	let e = _mm512_setr_ps(
45425	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
45426	`0.99999994`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
45427	);
45428	assert_eq_m512(r, e);
45429	}
45430
45431	#[simd_test(enable = "avx512f")]
45432	unsafe fn test_mm512_mask3_fnmadd_round_ps() {
45433	let a = _mm512_set1_ps(`0.00000007`);
45434	let b = _mm512_set1_ps(`1.`);
45435	let c = _mm512_set1_ps(`1.`);
45436	let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45437	a, b, c, `0`,
45438	);
45439	assert_eq_m512(r, c);
45440	let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45441	a,
45442	b,
45443	c,
45444	`0b00000000_11111111`,
45445	);
45446	let e = _mm512_setr_ps(
45447	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
45448	`0.99999994`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45449	);
45450	assert_eq_m512(r, e);
45451	}
45452
45453	#[simd_test(enable = "avx512f")]
45454	unsafe fn test_mm512_fnmsub_round_ps() {
45455	let a = _mm512_set1_ps(`0.00000007`);
45456	let b = _mm512_set1_ps(`1.`);
45457	let c = _mm512_set1_ps(`-1.`);
45458	let r =
45459	_mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
45460	let e = _mm512_set1_ps(`0.99999994`);
45461	assert_eq_m512(r, e);
45462	let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
45463	let e = _mm512_set1_ps(`0.9999999`);
45464	assert_eq_m512(r, e);
45465	}
45466
45467	#[simd_test(enable = "avx512f")]
45468	unsafe fn test_mm512_mask_fnmsub_round_ps() {
45469	let a = _mm512_set1_ps(`0.00000007`);
45470	let b = _mm512_set1_ps(`1.`);
45471	let c = _mm512_set1_ps(`-1.`);
45472	let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45473	a, `0`, b, c,
45474	);
45475	assert_eq_m512(r, a);
45476	let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45477	a,
45478	`0b00000000_11111111`,
45479	b,
45480	c,
45481	);
45482	let e = _mm512_setr_ps(
45483	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
45484	`0.99999994`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
45485	`0.00000007`, `0.00000007`,
45486	);
45487	assert_eq_m512(r, e);
45488	}
45489
45490	#[simd_test(enable = "avx512f")]
45491	unsafe fn test_mm512_maskz_fnmsub_round_ps() {
45492	let a = _mm512_set1_ps(`0.00000007`);
45493	let b = _mm512_set1_ps(`1.`);
45494	let c = _mm512_set1_ps(`-1.`);
45495	let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45496	`0`, a, b, c,
45497	);
45498	assert_eq_m512(r, _mm512_setzero_ps());
45499	let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45500	`0b00000000_11111111`,
45501	a,
45502	b,
45503	c,
45504	);
45505	let e = _mm512_setr_ps(
45506	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
45507	`0.99999994`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
45508	);
45509	assert_eq_m512(r, e);
45510	}
45511
45512	#[simd_test(enable = "avx512f")]
45513	unsafe fn test_mm512_mask3_fnmsub_round_ps() {
45514	let a = _mm512_set1_ps(`0.00000007`);
45515	let b = _mm512_set1_ps(`1.`);
45516	let c = _mm512_set1_ps(`-1.`);
45517	let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45518	a, b, c, `0`,
45519	);
45520	assert_eq_m512(r, c);
45521	let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45522	a,
45523	b,
45524	c,
45525	`0b00000000_11111111`,
45526	);
45527	let e = _mm512_setr_ps(
45528	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
45529	`0.99999994`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`,
45530	);
45531	assert_eq_m512(r, e);
45532	}
45533
45534	#[simd_test(enable = "avx512f")]
45535	unsafe fn test_mm512_max_round_ps() {
45536	let a = _mm512_setr_ps(
45537	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45538	);
45539	let b = _mm512_setr_ps(
45540	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
45541	);
45542	let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
45543	let e = _mm512_setr_ps(
45544	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45545	);
45546	assert_eq_m512(r, e);
45547	}
45548
45549	#[simd_test(enable = "avx512f")]
45550	unsafe fn test_mm512_mask_max_round_ps() {
45551	let a = _mm512_setr_ps(
45552	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45553	);
45554	let b = _mm512_setr_ps(
45555	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
45556	);
45557	let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
45558	assert_eq_m512(r, a);
45559	let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0b00000000_11111111`, a, b);
45560	let e = _mm512_setr_ps(
45561	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45562	);
45563	assert_eq_m512(r, e);
45564	}
45565
45566	#[simd_test(enable = "avx512f")]
45567	unsafe fn test_mm512_maskz_max_round_ps() {
45568	let a = _mm512_setr_ps(
45569	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45570	);
45571	let b = _mm512_setr_ps(
45572	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
45573	);
45574	let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
45575	assert_eq_m512(r, _mm512_setzero_ps());
45576	let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0b00000000_11111111`, a, b);
45577	let e = _mm512_setr_ps(
45578	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
45579	);
45580	assert_eq_m512(r, e);
45581	}
45582
45583	#[simd_test(enable = "avx512f")]
45584	unsafe fn test_mm512_min_round_ps() {
45585	let a = _mm512_setr_ps(
45586	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45587	);
45588	let b = _mm512_setr_ps(
45589	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
45590	);
45591	let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
45592	let e = _mm512_setr_ps(
45593	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
45594	);
45595	assert_eq_m512(r, e);
45596	}
45597
45598	#[simd_test(enable = "avx512f")]
45599	unsafe fn test_mm512_mask_min_round_ps() {
45600	let a = _mm512_setr_ps(
45601	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45602	);
45603	let b = _mm512_setr_ps(
45604	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
45605	);
45606	let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
45607	assert_eq_m512(r, a);
45608	let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0b00000000_11111111`, a, b);
45609	let e = _mm512_setr_ps(
45610	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45611	);
45612	assert_eq_m512(r, e);
45613	}
45614
45615	#[simd_test(enable = "avx512f")]
45616	unsafe fn test_mm512_maskz_min_round_ps() {
45617	let a = _mm512_setr_ps(
45618	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45619	);
45620	let b = _mm512_setr_ps(
45621	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
45622	);
45623	let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
45624	assert_eq_m512(r, _mm512_setzero_ps());
45625	let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0b00000000_11111111`, a, b);
45626	let e = _mm512_setr_ps(
45627	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
45628	);
45629	assert_eq_m512(r, e);
45630	}
45631
45632	#[simd_test(enable = "avx512f")]
45633	unsafe fn test_mm512_getexp_round_ps() {
45634	let a = _mm512_set1_ps(`3.`);
45635	let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
45636	let e = _mm512_set1_ps(`1.`);
45637	assert_eq_m512(r, e);
45638	}
45639
45640	#[simd_test(enable = "avx512f")]
45641	unsafe fn test_mm512_mask_getexp_round_ps() {
45642	let a = _mm512_set1_ps(`3.`);
45643	let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a);
45644	assert_eq_m512(r, a);
45645	let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111_00000000`, a);
45646	let e = _mm512_setr_ps(
45647	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45648	);
45649	assert_eq_m512(r, e);
45650	}
45651
45652	#[simd_test(enable = "avx512f")]
45653	unsafe fn test_mm512_maskz_getexp_round_ps() {
45654	let a = _mm512_set1_ps(`3.`);
45655	let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0`, a);
45656	assert_eq_m512(r, _mm512_setzero_ps());
45657	let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0b11111111_00000000`, a);
45658	let e = _mm512_setr_ps(
45659	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45660	);
45661	assert_eq_m512(r, e);
45662	}
45663
45664	#[simd_test(enable = "avx512f")]
45665	unsafe fn test_mm512_roundscale_round_ps() {
45666	let a = _mm512_set1_ps(`1.1`);
45667	let r = _mm512_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(a);
45668	let e = _mm512_set1_ps(`1.0`);
45669	assert_eq_m512(r, e);
45670	}
45671
45672	#[simd_test(enable = "avx512f")]
45673	unsafe fn test_mm512_mask_roundscale_round_ps() {
45674	let a = _mm512_set1_ps(`1.1`);
45675	let r = _mm512_mask_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0`, a);
45676	let e = _mm512_set1_ps(`1.1`);
45677	assert_eq_m512(r, e);
45678	let r = _mm512_mask_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(
45679	a,
45680	`0b11111111_11111111`,
45681	a,
45682	);
45683	let e = _mm512_set1_ps(`1.0`);
45684	assert_eq_m512(r, e);
45685	}
45686
45687	#[simd_test(enable = "avx512f")]
45688	unsafe fn test_mm512_maskz_roundscale_round_ps() {
45689	let a = _mm512_set1_ps(`1.1`);
45690	let r = _mm512_maskz_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(`0`, a);
45691	assert_eq_m512(r, _mm512_setzero_ps());
45692	let r =
45693	_mm512_maskz_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(`0b11111111_11111111`, a);
45694	let e = _mm512_set1_ps(`1.0`);
45695	assert_eq_m512(r, e);
45696	}
45697
45698	#[simd_test(enable = "avx512f")]
45699	unsafe fn test_mm512_scalef_round_ps() {
45700	let a = _mm512_set1_ps(`1.`);
45701	let b = _mm512_set1_ps(`3.`);
45702	let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
45703	let e = _mm512_set1_ps(`8.`);
45704	assert_eq_m512(r, e);
45705	}
45706
45707	#[simd_test(enable = "avx512f")]
45708	unsafe fn test_mm512_mask_scalef_round_ps() {
45709	let a = _mm512_set1_ps(`1.`);
45710	let b = _mm512_set1_ps(`3.`);
45711	let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45712	a, `0`, a, b,
45713	);
45714	assert_eq_m512(r, a);
45715	let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45716	a,
45717	`0b11111111_00000000`,
45718	a,
45719	b,
45720	);
45721	let e = _mm512_set_ps(
45722	`8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45723	);
45724	assert_eq_m512(r, e);
45725	}
45726
45727	#[simd_test(enable = "avx512f")]
45728	unsafe fn test_mm512_maskz_scalef_round_ps() {
45729	let a = _mm512_set1_ps(`1.`);
45730	let b = _mm512_set1_ps(`3.`);
45731	let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45732	`0`, a, b,
45733	);
45734	assert_eq_m512(r, _mm512_setzero_ps());
45735	let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
45736	`0b11111111_00000000`,
45737	a,
45738	b,
45739	);
45740	let e = _mm512_set_ps(
45741	`8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
45742	);
45743	assert_eq_m512(r, e);
45744	}
45745
45746	#[simd_test(enable = "avx512f")]
45747	unsafe fn test_mm512_fixupimm_round_ps() {
45748	let a = _mm512_set1_ps(f32::NAN);
45749	let b = _mm512_set1_ps(f32::MAX);
45750	let c = _mm512_set1_epi32(i32::MAX);
45751	let r = _mm512_fixupimm_round_ps::<`5`, _MM_FROUND_CUR_DIRECTION>(a, b, c);
45752	let e = _mm512_set1_ps(`0.0`);
45753	assert_eq_m512(r, e);
45754	}
45755
45756	#[simd_test(enable = "avx512f")]
45757	unsafe fn test_mm512_mask_fixupimm_round_ps() {
45758	#[rustfmt::skip]
45759	let a = _mm512_set_ps(
45760	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
45761	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
45762	`1.`, `1.`, `1.`, `1.`,
45763	`1.`, `1.`, `1.`, `1.`,
45764	);
45765	let b = _mm512_set1_ps(f32::MAX);
45766	let c = _mm512_set1_epi32(i32::MAX);
45767	let r = _mm512_mask_fixupimm_round_ps::<`5`, _MM_FROUND_CUR_DIRECTION>(
45768	a,
45769	`0b11111111_00000000`,
45770	b,
45771	c,
45772	);
45773	let e = _mm512_set_ps(
45774	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45775	);
45776	assert_eq_m512(r, e);
45777	}
45778
45779	#[simd_test(enable = "avx512f")]
45780	unsafe fn test_mm512_maskz_fixupimm_round_ps() {
45781	#[rustfmt::skip]
45782	let a = _mm512_set_ps(
45783	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
45784	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
45785	`1.`, `1.`, `1.`, `1.`,
45786	`1.`, `1.`, `1.`, `1.`,
45787	);
45788	let b = _mm512_set1_ps(f32::MAX);
45789	let c = _mm512_set1_epi32(i32::MAX);
45790	let r = _mm512_maskz_fixupimm_round_ps::<`5`, _MM_FROUND_CUR_DIRECTION>(
45791	`0b11111111_00000000`,
45792	a,
45793	b,
45794	c,
45795	);
45796	let e = _mm512_set_ps(
45797	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
45798	);
45799	assert_eq_m512(r, e);
45800	}
45801
45802	#[simd_test(enable = "avx512f")]
45803	unsafe fn test_mm512_getmant_round_ps() {
45804	let a = _mm512_set1_ps(`10.`);
45805	let r = _mm512_getmant_round_ps::<
45806	_MM_MANT_NORM_1_2,
45807	_MM_MANT_SIGN_SRC,
45808	_MM_FROUND_CUR_DIRECTION,
45809	>(a);
45810	let e = _mm512_set1_ps(`1.25`);
45811	assert_eq_m512(r, e);
45812	}
45813
45814	#[simd_test(enable = "avx512f")]
45815	unsafe fn test_mm512_mask_getmant_round_ps() {
45816	let a = _mm512_set1_ps(`10.`);
45817	let r = _mm512_mask_getmant_round_ps::<
45818	_MM_MANT_NORM_1_2,
45819	_MM_MANT_SIGN_SRC,
45820	_MM_FROUND_CUR_DIRECTION,
45821	>(a, `0`, a);
45822	assert_eq_m512(r, a);
45823	let r = _mm512_mask_getmant_round_ps::<
45824	_MM_MANT_NORM_1_2,
45825	_MM_MANT_SIGN_SRC,
45826	_MM_FROUND_CUR_DIRECTION,
45827	>(a, `0b11111111_00000000`, a);
45828	let e = _mm512_setr_ps(
45829	`10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`,
45830	);
45831	assert_eq_m512(r, e);
45832	}
45833
45834	#[simd_test(enable = "avx512f")]
45835	unsafe fn test_mm512_maskz_getmant_round_ps() {
45836	let a = _mm512_set1_ps(`10.`);
45837	let r = _mm512_maskz_getmant_round_ps::<
45838	_MM_MANT_NORM_1_2,
45839	_MM_MANT_SIGN_SRC,
45840	_MM_FROUND_CUR_DIRECTION,
45841	>(`0`, a);
45842	assert_eq_m512(r, _mm512_setzero_ps());
45843	let r = _mm512_maskz_getmant_round_ps::<
45844	_MM_MANT_NORM_1_2,
45845	_MM_MANT_SIGN_SRC,
45846	_MM_FROUND_CUR_DIRECTION,
45847	>(`0b11111111_00000000`, a);
45848	let e = _mm512_setr_ps(
45849	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`,
45850	);
45851	assert_eq_m512(r, e);
45852	}
45853
45854	#[simd_test(enable = "avx512f")]
45855	unsafe fn test_mm512_cvtps_epi32() {
45856	let a = _mm512_setr_ps(
45857	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
45858	);
45859	let r = _mm512_cvtps_epi32(a);
45860	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
45861	assert_eq_m512i(r, e);
45862	}
45863
45864	#[simd_test(enable = "avx512f")]
45865	unsafe fn test_mm512_mask_cvtps_epi32() {
45866	let a = _mm512_setr_ps(
45867	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
45868	);
45869	let src = _mm512_set1_epi32(`0`);
45870	let r = _mm512_mask_cvtps_epi32(src, `0`, a);
45871	assert_eq_m512i(r, src);
45872	let r = _mm512_mask_cvtps_epi32(src, `0b00000000_11111111`, a);
45873	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
45874	assert_eq_m512i(r, e);
45875	}
45876
45877	#[simd_test(enable = "avx512f")]
45878	unsafe fn test_mm512_maskz_cvtps_epi32() {
45879	let a = _mm512_setr_ps(
45880	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
45881	);
45882	let r = _mm512_maskz_cvtps_epi32(`0`, a);
45883	assert_eq_m512i(r, _mm512_setzero_si512());
45884	let r = _mm512_maskz_cvtps_epi32(`0b00000000_11111111`, a);
45885	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
45886	assert_eq_m512i(r, e);
45887	}
45888
45889	#[simd_test(enable = "avx512f,avx512vl")]
45890	unsafe fn test_mm256_mask_cvtps_epi32() {
45891	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
45892	let src = _mm256_set1_epi32(`0`);
45893	let r = _mm256_mask_cvtps_epi32(src, `0`, a);
45894	assert_eq_m256i(r, src);
45895	let r = _mm256_mask_cvtps_epi32(src, `0b11111111`, a);
45896	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
45897	assert_eq_m256i(r, e);
45898	}
45899
45900	#[simd_test(enable = "avx512f,avx512vl")]
45901	unsafe fn test_mm256_maskz_cvtps_epi32() {
45902	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
45903	let r = _mm256_maskz_cvtps_epi32(`0`, a);
45904	assert_eq_m256i(r, _mm256_setzero_si256());
45905	let r = _mm256_maskz_cvtps_epi32(`0b11111111`, a);
45906	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
45907	assert_eq_m256i(r, e);
45908	}
45909
45910	#[simd_test(enable = "avx512f,avx512vl")]
45911	unsafe fn test_mm_mask_cvtps_epi32() {
45912	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
45913	let src = _mm_set1_epi32(`0`);
45914	let r = _mm_mask_cvtps_epi32(src, `0`, a);
45915	assert_eq_m128i(r, src);
45916	let r = _mm_mask_cvtps_epi32(src, `0b00001111`, a);
45917	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
45918	assert_eq_m128i(r, e);
45919	}
45920
45921	#[simd_test(enable = "avx512f,avx512vl")]
45922	unsafe fn test_mm_maskz_cvtps_epi32() {
45923	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
45924	let r = _mm_maskz_cvtps_epi32(`0`, a);
45925	assert_eq_m128i(r, _mm_setzero_si128());
45926	let r = _mm_maskz_cvtps_epi32(`0b00001111`, a);
45927	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
45928	assert_eq_m128i(r, e);
45929	}
45930
45931	#[simd_test(enable = "avx512f")]
45932	unsafe fn test_mm512_cvtps_epu32() {
45933	let a = _mm512_setr_ps(
45934	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
45935	);
45936	let r = _mm512_cvtps_epu32(a);
45937	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
45938	assert_eq_m512i(r, e);
45939	}
45940
45941	#[simd_test(enable = "avx512f")]
45942	unsafe fn test_mm512_mask_cvtps_epu32() {
45943	let a = _mm512_setr_ps(
45944	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
45945	);
45946	let src = _mm512_set1_epi32(`0`);
45947	let r = _mm512_mask_cvtps_epu32(src, `0`, a);
45948	assert_eq_m512i(r, src);
45949	let r = _mm512_mask_cvtps_epu32(src, `0b00000000_11111111`, a);
45950	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
45951	assert_eq_m512i(r, e);
45952	}
45953
45954	#[simd_test(enable = "avx512f")]
45955	unsafe fn test_mm512_maskz_cvtps_epu32() {
45956	let a = _mm512_setr_ps(
45957	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
45958	);
45959	let r = _mm512_maskz_cvtps_epu32(`0`, a);
45960	assert_eq_m512i(r, _mm512_setzero_si512());
45961	let r = _mm512_maskz_cvtps_epu32(`0b00000000_11111111`, a);
45962	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
45963	assert_eq_m512i(r, e);
45964	}
45965
45966	#[simd_test(enable = "avx512f,avx512vl")]
45967	unsafe fn test_mm256_cvtps_epu32() {
45968	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
45969	let r = _mm256_cvtps_epu32(a);
45970	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
45971	assert_eq_m256i(r, e);
45972	}
45973
45974	#[simd_test(enable = "avx512f,avx512vl")]
45975	unsafe fn test_mm256_mask_cvtps_epu32() {
45976	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
45977	let src = _mm256_set1_epi32(`0`);
45978	let r = _mm256_mask_cvtps_epu32(src, `0`, a);
45979	assert_eq_m256i(r, src);
45980	let r = _mm256_mask_cvtps_epu32(src, `0b11111111`, a);
45981	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
45982	assert_eq_m256i(r, e);
45983	}
45984
45985	#[simd_test(enable = "avx512f,avx512vl")]
45986	unsafe fn test_mm256_maskz_cvtps_epu32() {
45987	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
45988	let r = _mm256_maskz_cvtps_epu32(`0`, a);
45989	assert_eq_m256i(r, _mm256_setzero_si256());
45990	let r = _mm256_maskz_cvtps_epu32(`0b11111111`, a);
45991	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
45992	assert_eq_m256i(r, e);
45993	}
45994
45995	#[simd_test(enable = "avx512f,avx512vl")]
45996	unsafe fn test_mm_cvtps_epu32() {
45997	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
45998	let r = _mm_cvtps_epu32(a);
45999	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
46000	assert_eq_m128i(r, e);
46001	}
46002
46003	#[simd_test(enable = "avx512f,avx512vl")]
46004	unsafe fn test_mm_mask_cvtps_epu32() {
46005	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
46006	let src = _mm_set1_epi32(`0`);
46007	let r = _mm_mask_cvtps_epu32(src, `0`, a);
46008	assert_eq_m128i(r, src);
46009	let r = _mm_mask_cvtps_epu32(src, `0b00001111`, a);
46010	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
46011	assert_eq_m128i(r, e);
46012	}
46013
46014	#[simd_test(enable = "avx512f,avx512vl")]
46015	unsafe fn test_mm_maskz_cvtps_epu32() {
46016	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
46017	let r = _mm_maskz_cvtps_epu32(`0`, a);
46018	assert_eq_m128i(r, _mm_setzero_si128());
46019	let r = _mm_maskz_cvtps_epu32(`0b00001111`, a);
46020	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
46021	assert_eq_m128i(r, e);
46022	}
46023
46024	#[simd_test(enable = "avx512f")]
46025	unsafe fn test_mm512_cvtepi8_epi32() {
46026	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46027	let r = _mm512_cvtepi8_epi32(a);
46028	let e = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46029	assert_eq_m512i(r, e);
46030	}
46031
46032	#[simd_test(enable = "avx512f")]
46033	unsafe fn test_mm512_mask_cvtepi8_epi32() {
46034	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46035	let src = _mm512_set1_epi32(`-1`);
46036	let r = _mm512_mask_cvtepi8_epi32(src, `0`, a);
46037	assert_eq_m512i(r, src);
46038	let r = _mm512_mask_cvtepi8_epi32(src, `0b00000000_11111111`, a);
46039	let e = _mm512_set_epi32(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46040	assert_eq_m512i(r, e);
46041	}
46042
46043	#[simd_test(enable = "avx512f")]
46044	unsafe fn test_mm512_maskz_cvtepi8_epi32() {
46045	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46046	let r = _mm512_maskz_cvtepi8_epi32(`0`, a);
46047	assert_eq_m512i(r, _mm512_setzero_si512());
46048	let r = _mm512_maskz_cvtepi8_epi32(`0b00000000_11111111`, a);
46049	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46050	assert_eq_m512i(r, e);
46051	}
46052
46053	#[simd_test(enable = "avx512f,avx512vl")]
46054	unsafe fn test_mm256_mask_cvtepi8_epi32() {
46055	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46056	let src = _mm256_set1_epi32(`-1`);
46057	let r = _mm256_mask_cvtepi8_epi32(src, `0`, a);
46058	assert_eq_m256i(r, src);
46059	let r = _mm256_mask_cvtepi8_epi32(src, `0b11111111`, a);
46060	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46061	assert_eq_m256i(r, e);
46062	}
46063
46064	#[simd_test(enable = "avx512f,avx512vl")]
46065	unsafe fn test_mm256_maskz_cvtepi8_epi32() {
46066	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46067	let r = _mm256_maskz_cvtepi8_epi32(`0`, a);
46068	assert_eq_m256i(r, _mm256_setzero_si256());
46069	let r = _mm256_maskz_cvtepi8_epi32(`0b11111111`, a);
46070	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46071	assert_eq_m256i(r, e);
46072	}
46073
46074	#[simd_test(enable = "avx512f,avx512vl")]
46075	unsafe fn test_mm_mask_cvtepi8_epi32() {
46076	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46077	let src = _mm_set1_epi32(`-1`);
46078	let r = _mm_mask_cvtepi8_epi32(src, `0`, a);
46079	assert_eq_m128i(r, src);
46080	let r = _mm_mask_cvtepi8_epi32(src, `0b00001111`, a);
46081	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
46082	assert_eq_m128i(r, e);
46083	}
46084
46085	#[simd_test(enable = "avx512f,avx512vl")]
46086	unsafe fn test_mm_maskz_cvtepi8_epi32() {
46087	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46088	let r = _mm_maskz_cvtepi8_epi32(`0`, a);
46089	assert_eq_m128i(r, _mm_setzero_si128());
46090	let r = _mm_maskz_cvtepi8_epi32(`0b00001111`, a);
46091	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
46092	assert_eq_m128i(r, e);
46093	}
46094
46095	#[simd_test(enable = "avx512f")]
46096	unsafe fn test_mm512_cvtepu8_epi32() {
46097	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46098	let r = _mm512_cvtepu8_epi32(a);
46099	let e = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46100	assert_eq_m512i(r, e);
46101	}
46102
46103	#[simd_test(enable = "avx512f")]
46104	unsafe fn test_mm512_mask_cvtepu8_epi32() {
46105	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46106	let src = _mm512_set1_epi32(`-1`);
46107	let r = _mm512_mask_cvtepu8_epi32(src, `0`, a);
46108	assert_eq_m512i(r, src);
46109	let r = _mm512_mask_cvtepu8_epi32(src, `0b00000000_11111111`, a);
46110	let e = _mm512_set_epi32(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46111	assert_eq_m512i(r, e);
46112	}
46113
46114	#[simd_test(enable = "avx512f")]
46115	unsafe fn test_mm512_maskz_cvtepu8_epi32() {
46116	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46117	let r = _mm512_maskz_cvtepu8_epi32(`0`, a);
46118	assert_eq_m512i(r, _mm512_setzero_si512());
46119	let r = _mm512_maskz_cvtepu8_epi32(`0b00000000_11111111`, a);
46120	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46121	assert_eq_m512i(r, e);
46122	}
46123
46124	#[simd_test(enable = "avx512f,avx512vl")]
46125	unsafe fn test_mm256_mask_cvtepu8_epi32() {
46126	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46127	let src = _mm256_set1_epi32(`-1`);
46128	let r = _mm256_mask_cvtepu8_epi32(src, `0`, a);
46129	assert_eq_m256i(r, src);
46130	let r = _mm256_mask_cvtepu8_epi32(src, `0b11111111`, a);
46131	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46132	assert_eq_m256i(r, e);
46133	}
46134
46135	#[simd_test(enable = "avx512f,avx512vl")]
46136	unsafe fn test_mm256_maskz_cvtepu8_epi32() {
46137	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46138	let r = _mm256_maskz_cvtepu8_epi32(`0`, a);
46139	assert_eq_m256i(r, _mm256_setzero_si256());
46140	let r = _mm256_maskz_cvtepu8_epi32(`0b11111111`, a);
46141	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46142	assert_eq_m256i(r, e);
46143	}
46144
46145	#[simd_test(enable = "avx512f,avx512vl")]
46146	unsafe fn test_mm_mask_cvtepu8_epi32() {
46147	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46148	let src = _mm_set1_epi32(`-1`);
46149	let r = _mm_mask_cvtepu8_epi32(src, `0`, a);
46150	assert_eq_m128i(r, src);
46151	let r = _mm_mask_cvtepu8_epi32(src, `0b00001111`, a);
46152	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
46153	assert_eq_m128i(r, e);
46154	}
46155
46156	#[simd_test(enable = "avx512f,avx512vl")]
46157	unsafe fn test_mm_maskz_cvtepu8_epi32() {
46158	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46159	let r = _mm_maskz_cvtepu8_epi32(`0`, a);
46160	assert_eq_m128i(r, _mm_setzero_si128());
46161	let r = _mm_maskz_cvtepu8_epi32(`0b00001111`, a);
46162	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
46163	assert_eq_m128i(r, e);
46164	}
46165
46166	#[simd_test(enable = "avx512f")]
46167	unsafe fn test_mm512_cvtepi16_epi32() {
46168	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46169	let r = _mm512_cvtepi16_epi32(a);
46170	let e = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46171	assert_eq_m512i(r, e);
46172	}
46173
46174	#[simd_test(enable = "avx512f")]
46175	unsafe fn test_mm512_mask_cvtepi16_epi32() {
46176	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46177	let src = _mm512_set1_epi32(`-1`);
46178	let r = _mm512_mask_cvtepi16_epi32(src, `0`, a);
46179	assert_eq_m512i(r, src);
46180	let r = _mm512_mask_cvtepi16_epi32(src, `0b00000000_11111111`, a);
46181	let e = _mm512_set_epi32(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46182	assert_eq_m512i(r, e);
46183	}
46184
46185	#[simd_test(enable = "avx512f")]
46186	unsafe fn test_mm512_maskz_cvtepi16_epi32() {
46187	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46188	let r = _mm512_maskz_cvtepi16_epi32(`0`, a);
46189	assert_eq_m512i(r, _mm512_setzero_si512());
46190	let r = _mm512_maskz_cvtepi16_epi32(`0b00000000_11111111`, a);
46191	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46192	assert_eq_m512i(r, e);
46193	}
46194
46195	#[simd_test(enable = "avx512f,avx512vl")]
46196	unsafe fn test_mm256_mask_cvtepi16_epi32() {
46197	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46198	let src = _mm256_set1_epi32(`-1`);
46199	let r = _mm256_mask_cvtepi16_epi32(src, `0`, a);
46200	assert_eq_m256i(r, src);
46201	let r = _mm256_mask_cvtepi16_epi32(src, `0b11111111`, a);
46202	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46203	assert_eq_m256i(r, e);
46204	}
46205
46206	#[simd_test(enable = "avx512f,avx512vl")]
46207	unsafe fn test_mm256_maskz_cvtepi16_epi32() {
46208	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46209	let r = _mm256_maskz_cvtepi16_epi32(`0`, a);
46210	assert_eq_m256i(r, _mm256_setzero_si256());
46211	let r = _mm256_maskz_cvtepi16_epi32(`0b11111111`, a);
46212	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46213	assert_eq_m256i(r, e);
46214	}
46215
46216	#[simd_test(enable = "avx512f,avx512vl")]
46217	unsafe fn test_mm_mask_cvtepi16_epi32() {
46218	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46219	let src = _mm_set1_epi32(`-1`);
46220	let r = _mm_mask_cvtepi16_epi32(src, `0`, a);
46221	assert_eq_m128i(r, src);
46222	let r = _mm_mask_cvtepi16_epi32(src, `0b00001111`, a);
46223	let e = _mm_set_epi32(`4`, `5`, `6`, `7`);
46224	assert_eq_m128i(r, e);
46225	}
46226
46227	#[simd_test(enable = "avx512f,avx512vl")]
46228	unsafe fn test_mm_maskz_cvtepi16_epi32() {
46229	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46230	let r = _mm_maskz_cvtepi16_epi32(`0`, a);
46231	assert_eq_m128i(r, _mm_setzero_si128());
46232	let r = _mm_maskz_cvtepi16_epi32(`0b00001111`, a);
46233	let e = _mm_set_epi32(`4`, `5`, `6`, `7`);
46234	assert_eq_m128i(r, e);
46235	}
46236
46237	#[simd_test(enable = "avx512f")]
46238	unsafe fn test_mm512_cvtepu16_epi32() {
46239	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46240	let r = _mm512_cvtepu16_epi32(a);
46241	let e = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46242	assert_eq_m512i(r, e);
46243	}
46244
46245	#[simd_test(enable = "avx512f")]
46246	unsafe fn test_mm512_mask_cvtepu16_epi32() {
46247	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46248	let src = _mm512_set1_epi32(`-1`);
46249	let r = _mm512_mask_cvtepu16_epi32(src, `0`, a);
46250	assert_eq_m512i(r, src);
46251	let r = _mm512_mask_cvtepu16_epi32(src, `0b00000000_11111111`, a);
46252	let e = _mm512_set_epi32(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46253	assert_eq_m512i(r, e);
46254	}
46255
46256	#[simd_test(enable = "avx512f")]
46257	unsafe fn test_mm512_maskz_cvtepu16_epi32() {
46258	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46259	let r = _mm512_maskz_cvtepu16_epi32(`0`, a);
46260	assert_eq_m512i(r, _mm512_setzero_si512());
46261	let r = _mm512_maskz_cvtepu16_epi32(`0b00000000_11111111`, a);
46262	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46263	assert_eq_m512i(r, e);
46264	}
46265
46266	#[simd_test(enable = "avx512f,avx512vl")]
46267	unsafe fn test_mm256_mask_cvtepu16_epi32() {
46268	let a = _mm_set_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46269	let src = _mm256_set1_epi32(`-1`);
46270	let r = _mm256_mask_cvtepu16_epi32(src, `0`, a);
46271	assert_eq_m256i(r, src);
46272	let r = _mm256_mask_cvtepu16_epi32(src, `0b11111111`, a);
46273	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46274	assert_eq_m256i(r, e);
46275	}
46276
46277	#[simd_test(enable = "avx512f,avx512vl")]
46278	unsafe fn test_mm256_maskz_cvtepu16_epi32() {
46279	let a = _mm_set_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46280	let r = _mm256_maskz_cvtepu16_epi32(`0`, a);
46281	assert_eq_m256i(r, _mm256_setzero_si256());
46282	let r = _mm256_maskz_cvtepu16_epi32(`0b11111111`, a);
46283	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46284	assert_eq_m256i(r, e);
46285	}
46286
46287	#[simd_test(enable = "avx512f,avx512vl")]
46288	unsafe fn test_mm_mask_cvtepu16_epi32() {
46289	let a = _mm_set_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46290	let src = _mm_set1_epi32(`-1`);
46291	let r = _mm_mask_cvtepu16_epi32(src, `0`, a);
46292	assert_eq_m128i(r, src);
46293	let r = _mm_mask_cvtepu16_epi32(src, `0b00001111`, a);
46294	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
46295	assert_eq_m128i(r, e);
46296	}
46297
46298	#[simd_test(enable = "avx512f,avx512vl")]
46299	unsafe fn test_mm_maskz_cvtepu16_epi32() {
46300	let a = _mm_set_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46301	let r = _mm_maskz_cvtepu16_epi32(`0`, a);
46302	assert_eq_m128i(r, _mm_setzero_si128());
46303	let r = _mm_maskz_cvtepu16_epi32(`0b00001111`, a);
46304	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
46305	assert_eq_m128i(r, e);
46306	}
46307
46308	#[simd_test(enable = "avx512f")]
46309	unsafe fn test_mm512_cvtepi32_ps() {
46310	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46311	let r = _mm512_cvtepi32_ps(a);
46312	let e = _mm512_set_ps(
46313	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46314	);
46315	assert_eq_m512(r, e);
46316	}
46317
46318	#[simd_test(enable = "avx512f")]
46319	unsafe fn test_mm512_mask_cvtepi32_ps() {
46320	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46321	let src = _mm512_set1_ps(`-1.`);
46322	let r = _mm512_mask_cvtepi32_ps(src, `0`, a);
46323	assert_eq_m512(r, src);
46324	let r = _mm512_mask_cvtepi32_ps(src, `0b00000000_11111111`, a);
46325	let e = _mm512_set_ps(
46326	`-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46327	);
46328	assert_eq_m512(r, e);
46329	}
46330
46331	#[simd_test(enable = "avx512f")]
46332	unsafe fn test_mm512_maskz_cvtepi32_ps() {
46333	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46334	let r = _mm512_maskz_cvtepi32_ps(`0`, a);
46335	assert_eq_m512(r, _mm512_setzero_ps());
46336	let r = _mm512_maskz_cvtepi32_ps(`0b00000000_11111111`, a);
46337	let e = _mm512_set_ps(
46338	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46339	);
46340	assert_eq_m512(r, e);
46341	}
46342
46343	#[simd_test(enable = "avx512f,avx512vl")]
46344	unsafe fn test_mm256_mask_cvtepi32_ps() {
46345	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
46346	let src = _mm256_set1_ps(`-1.`);
46347	let r = _mm256_mask_cvtepi32_ps(src, `0`, a);
46348	assert_eq_m256(r, src);
46349	let r = _mm256_mask_cvtepi32_ps(src, `0b11111111`, a);
46350	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
46351	assert_eq_m256(r, e);
46352	}
46353
46354	#[simd_test(enable = "avx512f,avx512vl")]
46355	unsafe fn test_mm256_maskz_cvtepi32_ps() {
46356	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
46357	let r = _mm256_maskz_cvtepi32_ps(`0`, a);
46358	assert_eq_m256(r, _mm256_setzero_ps());
46359	let r = _mm256_maskz_cvtepi32_ps(`0b11111111`, a);
46360	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
46361	assert_eq_m256(r, e);
46362	}
46363
46364	#[simd_test(enable = "avx512f,avx512vl")]
46365	unsafe fn test_mm_mask_cvtepi32_ps() {
46366	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
46367	let src = _mm_set1_ps(`-1.`);
46368	let r = _mm_mask_cvtepi32_ps(src, `0`, a);
46369	assert_eq_m128(r, src);
46370	let r = _mm_mask_cvtepi32_ps(src, `0b00001111`, a);
46371	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
46372	assert_eq_m128(r, e);
46373	}
46374
46375	#[simd_test(enable = "avx512f,avx512vl")]
46376	unsafe fn test_mm_maskz_cvtepi32_ps() {
46377	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
46378	let r = _mm_maskz_cvtepi32_ps(`0`, a);
46379	assert_eq_m128(r, _mm_setzero_ps());
46380	let r = _mm_maskz_cvtepi32_ps(`0b00001111`, a);
46381	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
46382	assert_eq_m128(r, e);
46383	}
46384
46385	#[simd_test(enable = "avx512f")]
46386	unsafe fn test_mm512_cvtepu32_ps() {
46387	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46388	let r = _mm512_cvtepu32_ps(a);
46389	let e = _mm512_set_ps(
46390	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46391	);
46392	assert_eq_m512(r, e);
46393	}
46394
46395	#[simd_test(enable = "avx512f")]
46396	unsafe fn test_mm512_mask_cvtepu32_ps() {
46397	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46398	let src = _mm512_set1_ps(`-1.`);
46399	let r = _mm512_mask_cvtepu32_ps(src, `0`, a);
46400	assert_eq_m512(r, src);
46401	let r = _mm512_mask_cvtepu32_ps(src, `0b00000000_11111111`, a);
46402	let e = _mm512_set_ps(
46403	`-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46404	);
46405	assert_eq_m512(r, e);
46406	}
46407
46408	#[simd_test(enable = "avx512f")]
46409	unsafe fn test_mm512_maskz_cvtepu32_ps() {
46410	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46411	let r = _mm512_maskz_cvtepu32_ps(`0`, a);
46412	assert_eq_m512(r, _mm512_setzero_ps());
46413	let r = _mm512_maskz_cvtepu32_ps(`0b00000000_11111111`, a);
46414	let e = _mm512_set_ps(
46415	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46416	);
46417	assert_eq_m512(r, e);
46418	}
46419
46420	#[simd_test(enable = "avx512f")]
46421	unsafe fn test_mm512_cvtepi32_epi16() {
46422	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46423	let r = _mm512_cvtepi32_epi16(a);
46424	let e = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46425	assert_eq_m256i(r, e);
46426	}
46427
46428	#[simd_test(enable = "avx512f")]
46429	unsafe fn test_mm512_mask_cvtepi32_epi16() {
46430	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46431	let src = _mm256_set1_epi16(`-1`);
46432	let r = _mm512_mask_cvtepi32_epi16(src, `0`, a);
46433	assert_eq_m256i(r, src);
46434	let r = _mm512_mask_cvtepi32_epi16(src, `0b00000000_11111111`, a);
46435	let e = _mm256_set_epi16(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46436	assert_eq_m256i(r, e);
46437	}
46438
46439	#[simd_test(enable = "avx512f")]
46440	unsafe fn test_mm512_maskz_cvtepi32_epi16() {
46441	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46442	let r = _mm512_maskz_cvtepi32_epi16(`0`, a);
46443	assert_eq_m256i(r, _mm256_setzero_si256());
46444	let r = _mm512_maskz_cvtepi32_epi16(`0b00000000_11111111`, a);
46445	let e = _mm256_set_epi16(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46446	assert_eq_m256i(r, e);
46447	}
46448
46449	#[simd_test(enable = "avx512f,avx512vl")]
46450	unsafe fn test_mm256_cvtepi32_epi16() {
46451	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46452	let r = _mm256_cvtepi32_epi16(a);
46453	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46454	assert_eq_m128i(r, e);
46455	}
46456
46457	#[simd_test(enable = "avx512f,avx512vl")]
46458	unsafe fn test_mm256_mask_cvtepi32_epi16() {
46459	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46460	let src = _mm_set1_epi16(`-1`);
46461	let r = _mm256_mask_cvtepi32_epi16(src, `0`, a);
46462	assert_eq_m128i(r, src);
46463	let r = _mm256_mask_cvtepi32_epi16(src, `0b11111111`, a);
46464	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46465	assert_eq_m128i(r, e);
46466	}
46467
46468	#[simd_test(enable = "avx512f,avx512vl")]
46469	unsafe fn test_mm256_maskz_cvtepi32_epi16() {
46470	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46471	let r = _mm256_maskz_cvtepi32_epi16(`0`, a);
46472	assert_eq_m128i(r, _mm_setzero_si128());
46473	let r = _mm256_maskz_cvtepi32_epi16(`0b11111111`, a);
46474	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46475	assert_eq_m128i(r, e);
46476	}
46477
46478	#[simd_test(enable = "avx512f,avx512vl")]
46479	unsafe fn test_mm_cvtepi32_epi16() {
46480	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
46481	let r = _mm_cvtepi32_epi16(a);
46482	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
46483	assert_eq_m128i(r, e);
46484	}
46485
46486	#[simd_test(enable = "avx512f,avx512vl")]
46487	unsafe fn test_mm_mask_cvtepi32_epi16() {
46488	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
46489	let src = _mm_set1_epi16(`0`);
46490	let r = _mm_mask_cvtepi32_epi16(src, `0`, a);
46491	assert_eq_m128i(r, src);
46492	let r = _mm_mask_cvtepi32_epi16(src, `0b00001111`, a);
46493	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
46494	assert_eq_m128i(r, e);
46495	}
46496
46497	#[simd_test(enable = "avx512f,avx512vl")]
46498	unsafe fn test_mm_maskz_cvtepi32_epi16() {
46499	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
46500	let r = _mm_maskz_cvtepi32_epi16(`0`, a);
46501	assert_eq_m128i(r, _mm_setzero_si128());
46502	let r = _mm_maskz_cvtepi32_epi16(`0b00001111`, a);
46503	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
46504	assert_eq_m128i(r, e);
46505	}
46506
46507	#[simd_test(enable = "avx512f")]
46508	unsafe fn test_mm512_cvtepi32_epi8() {
46509	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46510	let r = _mm512_cvtepi32_epi8(a);
46511	let e = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46512	assert_eq_m128i(r, e);
46513	}
46514
46515	#[simd_test(enable = "avx512f")]
46516	unsafe fn test_mm512_mask_cvtepi32_epi8() {
46517	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46518	let src = _mm_set1_epi8(`-1`);
46519	let r = _mm512_mask_cvtepi32_epi8(src, `0`, a);
46520	assert_eq_m128i(r, src);
46521	let r = _mm512_mask_cvtepi32_epi8(src, `0b00000000_11111111`, a);
46522	let e = _mm_set_epi8(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46523	assert_eq_m128i(r, e);
46524	}
46525
46526	#[simd_test(enable = "avx512f")]
46527	unsafe fn test_mm512_maskz_cvtepi32_epi8() {
46528	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46529	let r = _mm512_maskz_cvtepi32_epi8(`0`, a);
46530	assert_eq_m128i(r, _mm_setzero_si128());
46531	let r = _mm512_maskz_cvtepi32_epi8(`0b00000000_11111111`, a);
46532	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46533	assert_eq_m128i(r, e);
46534	}
46535
46536	#[simd_test(enable = "avx512f,avx512vl")]
46537	unsafe fn test_mm256_cvtepi32_epi8() {
46538	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46539	let r = _mm256_cvtepi32_epi8(a);
46540	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46541	assert_eq_m128i(r, e);
46542	}
46543
46544	#[simd_test(enable = "avx512f,avx512vl")]
46545	unsafe fn test_mm256_mask_cvtepi32_epi8() {
46546	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46547	let src = _mm_set1_epi8(`0`);
46548	let r = _mm256_mask_cvtepi32_epi8(src, `0`, a);
46549	assert_eq_m128i(r, src);
46550	let r = _mm256_mask_cvtepi32_epi8(src, `0b11111111`, a);
46551	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46552	assert_eq_m128i(r, e);
46553	}
46554
46555	#[simd_test(enable = "avx512f,avx512vl")]
46556	unsafe fn test_mm256_maskz_cvtepi32_epi8() {
46557	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46558	let r = _mm256_maskz_cvtepi32_epi8(`0`, a);
46559	assert_eq_m128i(r, _mm_setzero_si128());
46560	let r = _mm256_maskz_cvtepi32_epi8(`0b11111111`, a);
46561	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46562	assert_eq_m128i(r, e);
46563	}
46564
46565	#[simd_test(enable = "avx512f,avx512vl")]
46566	unsafe fn test_mm_cvtepi32_epi8() {
46567	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
46568	let r = _mm_cvtepi32_epi8(a);
46569	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
46570	assert_eq_m128i(r, e);
46571	}
46572
46573	#[simd_test(enable = "avx512f,avx512vl")]
46574	unsafe fn test_mm_mask_cvtepi32_epi8() {
46575	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
46576	let src = _mm_set1_epi8(`0`);
46577	let r = _mm_mask_cvtepi32_epi8(src, `0`, a);
46578	assert_eq_m128i(r, src);
46579	let r = _mm_mask_cvtepi32_epi8(src, `0b00001111`, a);
46580	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
46581	assert_eq_m128i(r, e);
46582	}
46583
46584	#[simd_test(enable = "avx512f,avx512vl")]
46585	unsafe fn test_mm_maskz_cvtepi32_epi8() {
46586	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
46587	let r = _mm_maskz_cvtepi32_epi8(`0`, a);
46588	assert_eq_m128i(r, _mm_setzero_si128());
46589	let r = _mm_maskz_cvtepi32_epi8(`0b00001111`, a);
46590	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
46591	assert_eq_m128i(r, e);
46592	}
46593
46594	#[simd_test(enable = "avx512f")]
46595	unsafe fn test_mm512_cvtsepi32_epi16() {
46596	#[rustfmt::skip]
46597	let a = _mm512_set_epi32(
46598	`0`, `1`, `2`, `3`,
46599	`4`, `5`, `6`, `7`,
46600	`8`, `9`, `10`, `11`,
46601	`12`, `13`, i32::MIN, i32::MAX,
46602	);
46603	let r = _mm512_cvtsepi32_epi16(a);
46604	#[rustfmt::skip]
46605	let e = _mm256_set_epi16(
46606	`0`, `1`, `2`, `3`,
46607	`4`, `5`, `6`, `7`,
46608	`8`, `9`, `10`, `11`,
46609	`12`, `13`, i16::MIN, i16::MAX,
46610	);
46611	assert_eq_m256i(r, e);
46612	}
46613
46614	#[simd_test(enable = "avx512f")]
46615	unsafe fn test_mm512_mask_cvtsepi32_epi16() {
46616	#[rustfmt::skip]
46617	let a = _mm512_set_epi32(
46618	`0`, `1`, `2`, `3`,
46619	`4`, `5`, `6`, `7`,
46620	`8`, `9`, `10`, `11`,
46621	`12`, `13`, i32::MIN, i32::MAX,
46622	);
46623	let src = _mm256_set1_epi16(`-1`);
46624	let r = _mm512_mask_cvtsepi32_epi16(src, `0`, a);
46625	assert_eq_m256i(r, src);
46626	let r = _mm512_mask_cvtsepi32_epi16(src, `0b00000000_11111111`, a);
46627	#[rustfmt::skip]
46628	let e = _mm256_set_epi16(
46629	`-1`, `-1`, `-1`, `-1`,
46630	`-1`, `-1`, `-1`, `-1`,
46631	`8`, `9`, `10`, `11`,
46632	`12`, `13`, i16::MIN, i16::MAX,
46633	);
46634	assert_eq_m256i(r, e);
46635	}
46636
46637	#[simd_test(enable = "avx512f")]
46638	unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
46639	#[rustfmt::skip]
46640	let a = _mm512_set_epi32(
46641	`0`, `1`, `2`, `3`,
46642	`4`, `5`, `6`, `7`,
46643	`8`, `9`, `10`, `11`,
46644	`12`, `13`, i32::MIN, i32::MAX,
46645	);
46646	let r = _mm512_maskz_cvtsepi32_epi16(`0`, a);
46647	assert_eq_m256i(r, _mm256_setzero_si256());
46648	let r = _mm512_maskz_cvtsepi32_epi16(`0b00000000_11111111`, a);
46649	#[rustfmt::skip]
46650	let e = _mm256_set_epi16(
46651	`0`, `0`, `0`, `0`,
46652	`0`, `0`, `0`, `0`,
46653	`8`, `9`, `10`, `11`,
46654	`12`, `13`, i16::MIN, i16::MAX,
46655	);
46656	assert_eq_m256i(r, e);
46657	}
46658
46659	#[simd_test(enable = "avx512f,avx512vl")]
46660	unsafe fn test_mm256_cvtsepi32_epi16() {
46661	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46662	let r = _mm256_cvtsepi32_epi16(a);
46663	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46664	assert_eq_m128i(r, e);
46665	}
46666
46667	#[simd_test(enable = "avx512f,avx512vl")]
46668	unsafe fn test_mm256_mask_cvtsepi32_epi16() {
46669	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46670	let src = _mm_set1_epi16(`-1`);
46671	let r = _mm256_mask_cvtsepi32_epi16(src, `0`, a);
46672	assert_eq_m128i(r, src);
46673	let r = _mm256_mask_cvtsepi32_epi16(src, `0b11111111`, a);
46674	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46675	assert_eq_m128i(r, e);
46676	}
46677
46678	#[simd_test(enable = "avx512f,avx512vl")]
46679	unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
46680	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46681	let r = _mm256_maskz_cvtsepi32_epi16(`0`, a);
46682	assert_eq_m128i(r, _mm_setzero_si128());
46683	let r = _mm256_maskz_cvtsepi32_epi16(`0b11111111`, a);
46684	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46685	assert_eq_m128i(r, e);
46686	}
46687
46688	#[simd_test(enable = "avx512f,avx512vl")]
46689	unsafe fn test_mm_cvtsepi32_epi16() {
46690	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
46691	let r = _mm_cvtsepi32_epi16(a);
46692	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
46693	assert_eq_m128i(r, e);
46694	}
46695
46696	#[simd_test(enable = "avx512f,avx512vl")]
46697	unsafe fn test_mm_mask_cvtsepi32_epi16() {
46698	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
46699	let src = _mm_set1_epi16(`0`);
46700	let r = _mm_mask_cvtsepi32_epi16(src, `0`, a);
46701	assert_eq_m128i(r, src);
46702	let r = _mm_mask_cvtsepi32_epi16(src, `0b11111111`, a);
46703	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
46704	assert_eq_m128i(r, e);
46705	}
46706
46707	#[simd_test(enable = "avx512f,avx512vl")]
46708	unsafe fn test_mm_maskz_cvtsepi32_epi16() {
46709	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
46710	let r = _mm_maskz_cvtsepi32_epi16(`0`, a);
46711	assert_eq_m128i(r, _mm_setzero_si128());
46712	let r = _mm_maskz_cvtsepi32_epi16(`0b11111111`, a);
46713	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
46714	assert_eq_m128i(r, e);
46715	}
46716
46717	#[simd_test(enable = "avx512f")]
46718	unsafe fn test_mm512_cvtsepi32_epi8() {
46719	#[rustfmt::skip]
46720	let a = _mm512_set_epi32(
46721	`0`, `1`, `2`, `3`,
46722	`4`, `5`, `6`, `7`,
46723	`8`, `9`, `10`, `11`,
46724	`12`, `13`, i32::MIN, i32::MAX,
46725	);
46726	let r = _mm512_cvtsepi32_epi8(a);
46727	#[rustfmt::skip]
46728	let e = _mm_set_epi8(
46729	`0`, `1`, `2`, `3`,
46730	`4`, `5`, `6`, `7`,
46731	`8`, `9`, `10`, `11`,
46732	`12`, `13`, i8::MIN, i8::MAX,
46733	);
46734	assert_eq_m128i(r, e);
46735	}
46736
46737	#[simd_test(enable = "avx512f")]
46738	unsafe fn test_mm512_mask_cvtsepi32_epi8() {
46739	#[rustfmt::skip]
46740	let a = _mm512_set_epi32(
46741	`0`, `1`, `2`, `3`,
46742	`4`, `5`, `6`, `7`,
46743	`8`, `9`, `10`, `11`,
46744	`12`, `13`, i32::MIN, i32::MAX,
46745	);
46746	let src = _mm_set1_epi8(`-1`);
46747	let r = _mm512_mask_cvtsepi32_epi8(src, `0`, a);
46748	assert_eq_m128i(r, src);
46749	let r = _mm512_mask_cvtsepi32_epi8(src, `0b00000000_11111111`, a);
46750	#[rustfmt::skip]
46751	let e = _mm_set_epi8(
46752	`-1`, `-1`, `-1`, `-1`,
46753	`-1`, `-1`, `-1`, `-1`,
46754	`8`, `9`, `10`, `11`,
46755	`12`, `13`, i8::MIN, i8::MAX,
46756	);
46757	assert_eq_m128i(r, e);
46758	}
46759
46760	#[simd_test(enable = "avx512f")]
46761	unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
46762	#[rustfmt::skip]
46763	let a = _mm512_set_epi32(
46764	`0`, `1`, `2`, `3`,
46765	`4`, `5`, `6`, `7`,
46766	`8`, `9`, `10`, `11`,
46767	`12`, `13`, i32::MIN, i32::MAX,
46768	);
46769	let r = _mm512_maskz_cvtsepi32_epi8(`0`, a);
46770	assert_eq_m128i(r, _mm_setzero_si128());
46771	let r = _mm512_maskz_cvtsepi32_epi8(`0b00000000_11111111`, a);
46772	#[rustfmt::skip]
46773	let e = _mm_set_epi8(
46774	`0`, `0`, `0`, `0`,
46775	`0`, `0`, `0`, `0`,
46776	`8`, `9`, `10`, `11`,
46777	`12`, `13`, i8::MIN, i8::MAX,
46778	);
46779	assert_eq_m128i(r, e);
46780	}
46781
46782	#[simd_test(enable = "avx512f,avx512vl")]
46783	unsafe fn test_mm256_cvtsepi32_epi8() {
46784	let a = _mm256_set_epi32(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
46785	let r = _mm256_cvtsepi32_epi8(a);
46786	#[rustfmt::skip]
46787	let e = _mm_set_epi8(
46788	`0`, `0`, `0`, `0`,
46789	`0`, `0`, `0`, `0`,
46790	`9`, `10`, `11`, `12`,
46791	`13`, `14`, `15`, `16`,
46792	);
46793	assert_eq_m128i(r, e);
46794	}
46795
46796	#[simd_test(enable = "avx512f,avx512vl")]
46797	unsafe fn test_mm256_mask_cvtsepi32_epi8() {
46798	let a = _mm256_set_epi32(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
46799	let src = _mm_set1_epi8(`0`);
46800	let r = _mm256_mask_cvtsepi32_epi8(src, `0`, a);
46801	assert_eq_m128i(r, src);
46802	let r = _mm256_mask_cvtsepi32_epi8(src, `0b11111111`, a);
46803	#[rustfmt::skip]
46804	let e = _mm_set_epi8(
46805	`0`, `0`, `0`, `0`,
46806	`0`, `0`, `0`, `0`,
46807	`9`, `10`, `11`, `12`,
46808	`13`, `14`, `15`, `16`,
46809	);
46810	assert_eq_m128i(r, e);
46811	}
46812
46813	#[simd_test(enable = "avx512f,avx512vl")]
46814	unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
46815	let a = _mm256_set_epi32(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
46816	let r = _mm256_maskz_cvtsepi32_epi8(`0`, a);
46817	assert_eq_m128i(r, _mm_setzero_si128());
46818	let r = _mm256_maskz_cvtsepi32_epi8(`0b11111111`, a);
46819	#[rustfmt::skip]
46820	let e = _mm_set_epi8(
46821	`0`, `0`, `0`, `0`,
46822	`0`, `0`, `0`, `0`,
46823	`9`, `10`, `11`, `12`,
46824	`13`, `14`, `15`, `16`,
46825	);
46826	assert_eq_m128i(r, e);
46827	}
46828
46829	#[simd_test(enable = "avx512f,avx512vl")]
46830	unsafe fn test_mm_cvtsepi32_epi8() {
46831	let a = _mm_set_epi32(`13`, `14`, `15`, `16`);
46832	let r = _mm_cvtsepi32_epi8(a);
46833	#[rustfmt::skip]
46834	let e = _mm_set_epi8(
46835	`0`, `0`, `0`, `0`,
46836	`0`, `0`, `0`, `0`,
46837	`0`, `0`, `0`, `0`,
46838	`13`, `14`, `15`, `16`,
46839	);
46840	assert_eq_m128i(r, e);
46841	}
46842
46843	#[simd_test(enable = "avx512f,avx512vl")]
46844	unsafe fn test_mm_mask_cvtsepi32_epi8() {
46845	let a = _mm_set_epi32(`13`, `14`, `15`, `16`);
46846	let src = _mm_set1_epi8(`0`);
46847	let r = _mm_mask_cvtsepi32_epi8(src, `0`, a);
46848	assert_eq_m128i(r, src);
46849	let r = _mm_mask_cvtsepi32_epi8(src, `0b00001111`, a);
46850	#[rustfmt::skip]
46851	let e = _mm_set_epi8(
46852	`0`, `0`, `0`, `0`,
46853	`0`, `0`, `0`, `0`,
46854	`0`, `0`, `0`, `0`,
46855	`13`, `14`, `15`, `16`,
46856	);
46857	assert_eq_m128i(r, e);
46858	}
46859
46860	#[simd_test(enable = "avx512f,avx512vl")]
46861	unsafe fn test_mm_maskz_cvtsepi32_epi8() {
46862	let a = _mm_set_epi32(`13`, `14`, `15`, `16`);
46863	let r = _mm_maskz_cvtsepi32_epi8(`0`, a);
46864	assert_eq_m128i(r, _mm_setzero_si128());
46865	let r = _mm_maskz_cvtsepi32_epi8(`0b00001111`, a);
46866	#[rustfmt::skip]
46867	let e = _mm_set_epi8(
46868	`0`, `0`, `0`, `0`,
46869	`0`, `0`, `0`, `0`,
46870	`0`, `0`, `0`, `0`,
46871	`13`, `14`, `15`, `16`,
46872	);
46873	assert_eq_m128i(r, e);
46874	}
46875
46876	#[simd_test(enable = "avx512f")]
46877	unsafe fn test_mm512_cvtusepi32_epi16() {
46878	#[rustfmt::skip]
46879	let a = _mm512_set_epi32(
46880	`0`, `1`, `2`, `3`,
46881	`4`, `5`, `6`, `7`,
46882	`8`, `9`, `10`, `11`,
46883	`12`, `13`, i32::MIN, i32::MIN,
46884	);
46885	let r = _mm512_cvtusepi32_epi16(a);
46886	let e = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
46887	assert_eq_m256i(r, e);
46888	}
46889
46890	#[simd_test(enable = "avx512f")]
46891	unsafe fn test_mm512_mask_cvtusepi32_epi16() {
46892	#[rustfmt::skip]
46893	let a = _mm512_set_epi32(
46894	`0`, `1`, `2`, `3`,
46895	`4`, `5`, `6`, `7`,
46896	`8`, `9`, `10`, `11`,
46897	`12`, `13`, i32::MIN, i32::MIN,
46898	);
46899	let src = _mm256_set1_epi16(`-1`);
46900	let r = _mm512_mask_cvtusepi32_epi16(src, `0`, a);
46901	assert_eq_m256i(r, src);
46902	let r = _mm512_mask_cvtusepi32_epi16(src, `0b00000000_11111111`, a);
46903	let e = _mm256_set_epi16(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
46904	assert_eq_m256i(r, e);
46905	}
46906
46907	#[simd_test(enable = "avx512f")]
46908	unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
46909	#[rustfmt::skip]
46910	let a = _mm512_set_epi32(
46911	`0`, `1`, `2`, `3`,
46912	`4`, `5`, `6`, `7`,
46913	`8`, `9`, `10`, `11`,
46914	`12`, `13`, i32::MIN, i32::MIN,
46915	);
46916	let r = _mm512_maskz_cvtusepi32_epi16(`0`, a);
46917	assert_eq_m256i(r, _mm256_setzero_si256());
46918	let r = _mm512_maskz_cvtusepi32_epi16(`0b00000000_11111111`, a);
46919	let e = _mm256_set_epi16(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
46920	assert_eq_m256i(r, e);
46921	}
46922
46923	#[simd_test(enable = "avx512f,avx512vl")]
46924	unsafe fn test_mm256_cvtusepi32_epi16() {
46925	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
46926	let r = _mm256_cvtusepi32_epi16(a);
46927	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
46928	assert_eq_m128i(r, e);
46929	}
46930
46931	#[simd_test(enable = "avx512f,avx512vl")]
46932	unsafe fn test_mm256_mask_cvtusepi32_epi16() {
46933	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
46934	let src = _mm_set1_epi16(`0`);
46935	let r = _mm256_mask_cvtusepi32_epi16(src, `0`, a);
46936	assert_eq_m128i(r, src);
46937	let r = _mm256_mask_cvtusepi32_epi16(src, `0b11111111`, a);
46938	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
46939	assert_eq_m128i(r, e);
46940	}
46941
46942	#[simd_test(enable = "avx512f,avx512vl")]
46943	unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
46944	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
46945	let r = _mm256_maskz_cvtusepi32_epi16(`0`, a);
46946	assert_eq_m128i(r, _mm_setzero_si128());
46947	let r = _mm256_maskz_cvtusepi32_epi16(`0b11111111`, a);
46948	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
46949	assert_eq_m128i(r, e);
46950	}
46951
46952	#[simd_test(enable = "avx512f,avx512vl")]
46953	unsafe fn test_mm_cvtusepi32_epi16() {
46954	let a = _mm_set_epi32(`5`, `6`, `7`, `8`);
46955	let r = _mm_cvtusepi32_epi16(a);
46956	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `5`, `6`, `7`, `8`);
46957	assert_eq_m128i(r, e);
46958	}
46959
46960	#[simd_test(enable = "avx512f,avx512vl")]
46961	unsafe fn test_mm_mask_cvtusepi32_epi16() {
46962	let a = _mm_set_epi32(`5`, `6`, `7`, `8`);
46963	let src = _mm_set1_epi16(`0`);
46964	let r = _mm_mask_cvtusepi32_epi16(src, `0`, a);
46965	assert_eq_m128i(r, src);
46966	let r = _mm_mask_cvtusepi32_epi16(src, `0b00001111`, a);
46967	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `5`, `6`, `7`, `8`);
46968	assert_eq_m128i(r, e);
46969	}
46970
46971	#[simd_test(enable = "avx512f,avx512vl")]
46972	unsafe fn test_mm_maskz_cvtusepi32_epi16() {
46973	let a = _mm_set_epi32(`5`, `6`, `7`, `8`);
46974	let r = _mm_maskz_cvtusepi32_epi16(`0`, a);
46975	assert_eq_m128i(r, _mm_setzero_si128());
46976	let r = _mm_maskz_cvtusepi32_epi16(`0b00001111`, a);
46977	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `5`, `6`, `7`, `8`);
46978	assert_eq_m128i(r, e);
46979	}
46980
46981	#[simd_test(enable = "avx512f")]
46982	unsafe fn test_mm512_cvtusepi32_epi8() {
46983	#[rustfmt::skip]
46984	let a = _mm512_set_epi32(
46985	`0`, `1`, `2`, `3`,
46986	`4`, `5`, `6`, `7`,
46987	`8`, `9`, `10`, `11`,
46988	`12`, `13`, i32::MIN, i32::MIN,
46989	);
46990	let r = _mm512_cvtusepi32_epi8(a);
46991	let e = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
46992	assert_eq_m128i(r, e);
46993	}
46994
46995	#[simd_test(enable = "avx512f")]
46996	unsafe fn test_mm512_mask_cvtusepi32_epi8() {
46997	#[rustfmt::skip]
46998	let a = _mm512_set_epi32(
46999	`0`, `1`, `2`, `3`,
47000	`4`, `5`, `6`, `7`,
47001	`8`, `9`, `10`, `11`,
47002	`12`, `13`, i32::MIN, i32::MIN,
47003	);
47004	let src = _mm_set1_epi8(`-1`);
47005	let r = _mm512_mask_cvtusepi32_epi8(src, `0`, a);
47006	assert_eq_m128i(r, src);
47007	let r = _mm512_mask_cvtusepi32_epi8(src, `0b00000000_11111111`, a);
47008	let e = _mm_set_epi8(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
47009	assert_eq_m128i(r, e);
47010	}
47011
47012	#[simd_test(enable = "avx512f")]
47013	unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
47014	#[rustfmt::skip]
47015	let a = _mm512_set_epi32(
47016	`0`, `1`, `2`, `3`,
47017	`4`, `5`, `6`, `7`,
47018	`8`, `9`, `10`, `11`,
47019	`12`, `13`, i32::MIN, i32::MIN,
47020	);
47021	let r = _mm512_maskz_cvtusepi32_epi8(`0`, a);
47022	assert_eq_m128i(r, _mm_setzero_si128());
47023	let r = _mm512_maskz_cvtusepi32_epi8(`0b00000000_11111111`, a);
47024	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
47025	assert_eq_m128i(r, e);
47026	}
47027
47028	#[simd_test(enable = "avx512f,avx512vl")]
47029	unsafe fn test_mm256_cvtusepi32_epi8() {
47030	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, i32::MAX);
47031	let r = _mm256_cvtusepi32_epi8(a);
47032	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, u8::MAX as i8);
47033	assert_eq_m128i(r, e);
47034	}
47035
47036	#[simd_test(enable = "avx512f,avx512vl")]
47037	unsafe fn test_mm256_mask_cvtusepi32_epi8() {
47038	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, i32::MAX);
47039	let src = _mm_set1_epi8(`0`);
47040	let r = _mm256_mask_cvtusepi32_epi8(src, `0`, a);
47041	assert_eq_m128i(r, src);
47042	let r = _mm256_mask_cvtusepi32_epi8(src, `0b11111111`, a);
47043	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, u8::MAX as i8);
47044	assert_eq_m128i(r, e);
47045	}
47046
47047	#[simd_test(enable = "avx512f,avx512vl")]
47048	unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
47049	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, i32::MAX);
47050	let r = _mm256_maskz_cvtusepi32_epi8(`0`, a);
47051	assert_eq_m128i(r, _mm_setzero_si128());
47052	let r = _mm256_maskz_cvtusepi32_epi8(`0b11111111`, a);
47053	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, u8::MAX as i8);
47054	assert_eq_m128i(r, e);
47055	}
47056
47057	#[simd_test(enable = "avx512f,avx512vl")]
47058	unsafe fn test_mm_cvtusepi32_epi8() {
47059	let a = _mm_set_epi32(`5`, `6`, `7`, i32::MAX);
47060	let r = _mm_cvtusepi32_epi8(a);
47061	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `5`, `6`, `7`, u8::MAX as i8);
47062	assert_eq_m128i(r, e);
47063	}
47064
47065	#[simd_test(enable = "avx512f,avx512vl")]
47066	unsafe fn test_mm_mask_cvtusepi32_epi8() {
47067	let a = _mm_set_epi32(`5`, `6`, `7`, i32::MAX);
47068	let src = _mm_set1_epi8(`0`);
47069	let r = _mm_mask_cvtusepi32_epi8(src, `0`, a);
47070	assert_eq_m128i(r, src);
47071	let r = _mm_mask_cvtusepi32_epi8(src, `0b00001111`, a);
47072	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `5`, `6`, `7`, u8::MAX as i8);
47073	assert_eq_m128i(r, e);
47074	}
47075
47076	#[simd_test(enable = "avx512f,avx512vl")]
47077	unsafe fn test_mm_maskz_cvtusepi32_epi8() {
47078	let a = _mm_set_epi32(`5`, `6`, `7`, i32::MAX);
47079	let r = _mm_maskz_cvtusepi32_epi8(`0`, a);
47080	assert_eq_m128i(r, _mm_setzero_si128());
47081	let r = _mm_maskz_cvtusepi32_epi8(`0b00001111`, a);
47082	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `5`, `6`, `7`, u8::MAX as i8);
47083	assert_eq_m128i(r, e);
47084	}
47085
47086	#[simd_test(enable = "avx512f")]
47087	unsafe fn test_mm512_cvt_roundps_epi32() {
47088	let a = _mm512_setr_ps(
47089	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47090	);
47091	let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
47092	let e = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47093	assert_eq_m512i(r, e);
47094	let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC }>(a);
47095	let e = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47096	assert_eq_m512i(r, e);
47097	}
47098
47099	#[simd_test(enable = "avx512f")]
47100	unsafe fn test_mm512_mask_cvt_roundps_epi32() {
47101	let a = _mm512_setr_ps(
47102	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47103	);
47104	let src = _mm512_set1_epi32(`0`);
47105	let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47106	src, `0`, a,
47107	);
47108	assert_eq_m512i(r, src);
47109	let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47110	src,
47111	`0b00000000_11111111`,
47112	a,
47113	);
47114	let e = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47115	assert_eq_m512i(r, e);
47116	}
47117
47118	#[simd_test(enable = "avx512f")]
47119	unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
47120	let a = _mm512_setr_ps(
47121	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47122	);
47123	let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47124	`0`, a,
47125	);
47126	assert_eq_m512i(r, _mm512_setzero_si512());
47127	let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47128	`0b00000000_11111111`,
47129	a,
47130	);
47131	let e = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47132	assert_eq_m512i(r, e);
47133	}
47134
47135	#[simd_test(enable = "avx512f")]
47136	unsafe fn test_mm512_cvt_roundps_epu32() {
47137	let a = _mm512_setr_ps(
47138	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47139	);
47140	let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
47141	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47142	assert_eq_m512i(r, e);
47143	let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC }>(a);
47144	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47145	assert_eq_m512i(r, e);
47146	}
47147
47148	#[simd_test(enable = "avx512f")]
47149	unsafe fn test_mm512_mask_cvt_roundps_epu32() {
47150	let a = _mm512_setr_ps(
47151	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47152	);
47153	let src = _mm512_set1_epi32(`0`);
47154	let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47155	src, `0`, a,
47156	);
47157	assert_eq_m512i(r, src);
47158	let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47159	src,
47160	`0b00000000_11111111`,
47161	a,
47162	);
47163	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47164	assert_eq_m512i(r, e);
47165	}
47166
47167	#[simd_test(enable = "avx512f")]
47168	unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
47169	let a = _mm512_setr_ps(
47170	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47171	);
47172	let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47173	`0`, a,
47174	);
47175	assert_eq_m512i(r, _mm512_setzero_si512());
47176	let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47177	`0b00000000_11111111`,
47178	a,
47179	);
47180	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47181	assert_eq_m512i(r, e);
47182	}
47183
47184	#[simd_test(enable = "avx512f")]
47185	unsafe fn test_mm512_cvt_roundepi32_ps() {
47186	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47187	let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
47188	let e = _mm512_setr_ps(
47189	`0.`, `-2.`, `2.`, `-4.`, `4.`, `-6.`, `6.`, `-8.`, `8.`, `10.`, `10.`, `12.`, `12.`, `14.`, `14.`, `16.`,
47190	);
47191	assert_eq_m512(r, e);
47192	}
47193
47194	#[simd_test(enable = "avx512f")]
47195	unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
47196	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47197	let src = _mm512_set1_ps(`0.`);
47198	let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47199	src, `0`, a,
47200	);
47201	assert_eq_m512(r, src);
47202	let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47203	src,
47204	`0b00000000_11111111`,
47205	a,
47206	);
47207	let e = _mm512_setr_ps(
47208	`0.`, `-2.`, `2.`, `-4.`, `4.`, `-6.`, `6.`, `-8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47209	);
47210	assert_eq_m512(r, e);
47211	}
47212
47213	#[simd_test(enable = "avx512f")]
47214	unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
47215	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47216	let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47217	`0`, a,
47218	);
47219	assert_eq_m512(r, _mm512_setzero_ps());
47220	let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47221	`0b00000000_11111111`,
47222	a,
47223	);
47224	let e = _mm512_setr_ps(
47225	`0.`, `-2.`, `2.`, `-4.`, `4.`, `-6.`, `6.`, `-8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47226	);
47227	assert_eq_m512(r, e);
47228	}
47229
47230	#[simd_test(enable = "avx512f")]
47231	unsafe fn test_mm512_cvt_roundepu32_ps() {
47232	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47233	let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
47234	#[rustfmt::skip]
47235	let e = _mm512_setr_ps(
47236	`0.`, `4294967300.`, `2.`, `4294967300.`,
47237	`4.`, `4294967300.`, `6.`, `4294967300.`,
47238	`8.`, `10.`, `10.`, `12.`,
47239	`12.`, `14.`, `14.`, `16.`,
47240	);
47241	assert_eq_m512(r, e);
47242	}
47243
47244	#[simd_test(enable = "avx512f")]
47245	unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
47246	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47247	let src = _mm512_set1_ps(`0.`);
47248	let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47249	src, `0`, a,
47250	);
47251	assert_eq_m512(r, src);
47252	let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47253	src,
47254	`0b00000000_11111111`,
47255	a,
47256	);
47257	#[rustfmt::skip]
47258	let e = _mm512_setr_ps(
47259	`0.`, `4294967300.`, `2.`, `4294967300.`,
47260	`4.`, `4294967300.`, `6.`, `4294967300.`,
47261	`0.`, `0.`, `0.`, `0.`,
47262	`0.`, `0.`, `0.`, `0.`,
47263	);
47264	assert_eq_m512(r, e);
47265	}
47266
47267	#[simd_test(enable = "avx512f")]
47268	unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
47269	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47270	let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47271	`0`, a,
47272	);
47273	assert_eq_m512(r, _mm512_setzero_ps());
47274	let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47275	`0b00000000_11111111`,
47276	a,
47277	);
47278	#[rustfmt::skip]
47279	let e = _mm512_setr_ps(
47280	`0.`, `4294967300.`, `2.`, `4294967300.`,
47281	`4.`, `4294967300.`, `6.`, `4294967300.`,
47282	`0.`, `0.`, `0.`, `0.`,
47283	`0.`, `0.`, `0.`, `0.`,
47284	);
47285	assert_eq_m512(r, e);
47286	}
47287
47288	#[simd_test(enable = "avx512f")]
47289	unsafe fn test_mm512_cvt_roundps_ph() {
47290	let a = _mm512_set1_ps(`1.`);
47291	let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
47292	let e = _mm256_setr_epi64x(
47293	`4323521613979991040`,
47294	`4323521613979991040`,
47295	`4323521613979991040`,
47296	`4323521613979991040`,
47297	);
47298	assert_eq_m256i(r, e);
47299	}
47300
47301	#[simd_test(enable = "avx512f")]
47302	unsafe fn test_mm512_mask_cvt_roundps_ph() {
47303	let a = _mm512_set1_ps(`1.`);
47304	let src = _mm256_set1_epi16(`0`);
47305	let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
47306	assert_eq_m256i(r, src);
47307	let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
47308	let e = _mm256_setr_epi64x(`4323521613979991040`, `4323521613979991040`, `0`, `0`);
47309	assert_eq_m256i(r, e);
47310	}
47311
47312	#[simd_test(enable = "avx512f")]
47313	unsafe fn test_mm512_maskz_cvt_roundps_ph() {
47314	let a = _mm512_set1_ps(`1.`);
47315	let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
47316	assert_eq_m256i(r, _mm256_setzero_si256());
47317	let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
47318	let e = _mm256_setr_epi64x(`4323521613979991040`, `4323521613979991040`, `0`, `0`);
47319	assert_eq_m256i(r, e);
47320	}
47321
47322	#[simd_test(enable = "avx512f,avx512vl")]
47323	unsafe fn test_mm256_mask_cvt_roundps_ph() {
47324	let a = _mm256_set1_ps(`1.`);
47325	let src = _mm_set1_epi16(`0`);
47326	let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
47327	assert_eq_m128i(r, src);
47328	let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0b11111111`, a);
47329	let e = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
47330	assert_eq_m128i(r, e);
47331	}
47332
47333	#[simd_test(enable = "avx512f,avx512vl")]
47334	unsafe fn test_mm256_maskz_cvt_roundps_ph() {
47335	let a = _mm256_set1_ps(`1.`);
47336	let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
47337	assert_eq_m128i(r, _mm_setzero_si128());
47338	let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0b11111111`, a);
47339	let e = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
47340	assert_eq_m128i(r, e);
47341	}
47342
47343	#[simd_test(enable = "avx512f,avx512vl")]
47344	unsafe fn test_mm_mask_cvt_roundps_ph() {
47345	let a = _mm_set1_ps(`1.`);
47346	let src = _mm_set1_epi16(`0`);
47347	let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
47348	assert_eq_m128i(r, src);
47349	let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0b00001111`, a);
47350	let e = _mm_setr_epi64x(`4323521613979991040`, `0`);
47351	assert_eq_m128i(r, e);
47352	}
47353
47354	#[simd_test(enable = "avx512f,avx512vl")]
47355	unsafe fn test_mm_maskz_cvt_roundps_ph() {
47356	let a = _mm_set1_ps(`1.`);
47357	let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
47358	assert_eq_m128i(r, _mm_setzero_si128());
47359	let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0b00001111`, a);
47360	let e = _mm_setr_epi64x(`4323521613979991040`, `0`);
47361	assert_eq_m128i(r, e);
47362	}
47363
47364	#[simd_test(enable = "avx512f")]
47365	unsafe fn test_mm512_cvtps_ph() {
47366	let a = _mm512_set1_ps(`1.`);
47367	let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
47368	let e = _mm256_setr_epi64x(
47369	`4323521613979991040`,
47370	`4323521613979991040`,
47371	`4323521613979991040`,
47372	`4323521613979991040`,
47373	);
47374	assert_eq_m256i(r, e);
47375	}
47376
47377	#[simd_test(enable = "avx512f")]
47378	unsafe fn test_mm512_mask_cvtps_ph() {
47379	let a = _mm512_set1_ps(`1.`);
47380	let src = _mm256_set1_epi16(`0`);
47381	let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
47382	assert_eq_m256i(r, src);
47383	let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
47384	let e = _mm256_setr_epi64x(`4323521613979991040`, `4323521613979991040`, `0`, `0`);
47385	assert_eq_m256i(r, e);
47386	}
47387
47388	#[simd_test(enable = "avx512f")]
47389	unsafe fn test_mm512_maskz_cvtps_ph() {
47390	let a = _mm512_set1_ps(`1.`);
47391	let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
47392	assert_eq_m256i(r, _mm256_setzero_si256());
47393	let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
47394	let e = _mm256_setr_epi64x(`4323521613979991040`, `4323521613979991040`, `0`, `0`);
47395	assert_eq_m256i(r, e);
47396	}
47397
47398	#[simd_test(enable = "avx512f,avx512vl")]
47399	unsafe fn test_mm256_mask_cvtps_ph() {
47400	let a = _mm256_set1_ps(`1.`);
47401	let src = _mm_set1_epi16(`0`);
47402	let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
47403	assert_eq_m128i(r, src);
47404	let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0b11111111`, a);
47405	let e = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
47406	assert_eq_m128i(r, e);
47407	}
47408
47409	#[simd_test(enable = "avx512f,avx512vl")]
47410	unsafe fn test_mm256_maskz_cvtps_ph() {
47411	let a = _mm256_set1_ps(`1.`);
47412	let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
47413	assert_eq_m128i(r, _mm_setzero_si128());
47414	let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0b11111111`, a);
47415	let e = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
47416	assert_eq_m128i(r, e);
47417	}
47418
47419	#[simd_test(enable = "avx512f,avx512vl")]
47420	unsafe fn test_mm_mask_cvtps_ph() {
47421	let a = _mm_set1_ps(`1.`);
47422	let src = _mm_set1_epi16(`0`);
47423	let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
47424	assert_eq_m128i(r, src);
47425	let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0b00001111`, a);
47426	let e = _mm_setr_epi64x(`4323521613979991040`, `0`);
47427	assert_eq_m128i(r, e);
47428	}
47429
47430	#[simd_test(enable = "avx512f,avx512vl")]
47431	unsafe fn test_mm_maskz_cvtps_ph() {
47432	let a = _mm_set1_ps(`1.`);
47433	let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
47434	assert_eq_m128i(r, _mm_setzero_si128());
47435	let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0b00001111`, a);
47436	let e = _mm_setr_epi64x(`4323521613979991040`, `0`);
47437	assert_eq_m128i(r, e);
47438	}
47439
47440	#[simd_test(enable = "avx512f")]
47441	unsafe fn test_mm512_cvt_roundph_ps() {
47442	let a = _mm256_setr_epi64x(
47443	`4323521613979991040`,
47444	`4323521613979991040`,
47445	`4323521613979991040`,
47446	`4323521613979991040`,
47447	);
47448	let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
47449	let e = _mm512_set1_ps(`1.`);
47450	assert_eq_m512(r, e);
47451	}
47452
47453	#[simd_test(enable = "avx512f")]
47454	unsafe fn test_mm512_mask_cvt_roundph_ps() {
47455	let a = _mm256_setr_epi64x(
47456	`4323521613979991040`,
47457	`4323521613979991040`,
47458	`4323521613979991040`,
47459	`4323521613979991040`,
47460	);
47461	let src = _mm512_set1_ps(`0.`);
47462	let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, `0`, a);
47463	assert_eq_m512(r, src);
47464	let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
47465	let e = _mm512_setr_ps(
47466	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47467	);
47468	assert_eq_m512(r, e);
47469	}
47470
47471	#[simd_test(enable = "avx512f")]
47472	unsafe fn test_mm512_maskz_cvt_roundph_ps() {
47473	let a = _mm256_setr_epi64x(
47474	`4323521613979991040`,
47475	`4323521613979991040`,
47476	`4323521613979991040`,
47477	`4323521613979991040`,
47478	);
47479	let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(`0`, a);
47480	assert_eq_m512(r, _mm512_setzero_ps());
47481	let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
47482	let e = _mm512_setr_ps(
47483	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47484	);
47485	assert_eq_m512(r, e);
47486	}
47487
47488	#[simd_test(enable = "avx512f")]
47489	unsafe fn test_mm512_cvtph_ps() {
47490	let a = _mm256_setr_epi64x(
47491	`4323521613979991040`,
47492	`4323521613979991040`,
47493	`4323521613979991040`,
47494	`4323521613979991040`,
47495	);
47496	let r = _mm512_cvtph_ps(a);
47497	let e = _mm512_set1_ps(`1.`);
47498	assert_eq_m512(r, e);
47499	}
47500
47501	#[simd_test(enable = "avx512f")]
47502	unsafe fn test_mm512_mask_cvtph_ps() {
47503	let a = _mm256_setr_epi64x(
47504	`4323521613979991040`,
47505	`4323521613979991040`,
47506	`4323521613979991040`,
47507	`4323521613979991040`,
47508	);
47509	let src = _mm512_set1_ps(`0.`);
47510	let r = _mm512_mask_cvtph_ps(src, `0`, a);
47511	assert_eq_m512(r, src);
47512	let r = _mm512_mask_cvtph_ps(src, `0b00000000_11111111`, a);
47513	let e = _mm512_setr_ps(
47514	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47515	);
47516	assert_eq_m512(r, e);
47517	}
47518
47519	#[simd_test(enable = "avx512f")]
47520	unsafe fn test_mm512_maskz_cvtph_ps() {
47521	let a = _mm256_setr_epi64x(
47522	`4323521613979991040`,
47523	`4323521613979991040`,
47524	`4323521613979991040`,
47525	`4323521613979991040`,
47526	);
47527	let r = _mm512_maskz_cvtph_ps(`0`, a);
47528	assert_eq_m512(r, _mm512_setzero_ps());
47529	let r = _mm512_maskz_cvtph_ps(`0b00000000_11111111`, a);
47530	let e = _mm512_setr_ps(
47531	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47532	);
47533	assert_eq_m512(r, e);
47534	}
47535
47536	#[simd_test(enable = "avx512f,avx512vl")]
47537	unsafe fn test_mm256_mask_cvtph_ps() {
47538	let a = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
47539	let src = _mm256_set1_ps(`0.`);
47540	let r = _mm256_mask_cvtph_ps(src, `0`, a);
47541	assert_eq_m256(r, src);
47542	let r = _mm256_mask_cvtph_ps(src, `0b11111111`, a);
47543	let e = _mm256_setr_ps(`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`);
47544	assert_eq_m256(r, e);
47545	}
47546
47547	#[simd_test(enable = "avx512f,avx512vl")]
47548	unsafe fn test_mm256_maskz_cvtph_ps() {
47549	let a = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
47550	let r = _mm256_maskz_cvtph_ps(`0`, a);
47551	assert_eq_m256(r, _mm256_setzero_ps());
47552	let r = _mm256_maskz_cvtph_ps(`0b11111111`, a);
47553	let e = _mm256_setr_ps(`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`);
47554	assert_eq_m256(r, e);
47555	}
47556
47557	#[simd_test(enable = "avx512f,avx512vl")]
47558	unsafe fn test_mm_mask_cvtph_ps() {
47559	let a = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
47560	let src = _mm_set1_ps(`0.`);
47561	let r = _mm_mask_cvtph_ps(src, `0`, a);
47562	assert_eq_m128(r, src);
47563	let r = _mm_mask_cvtph_ps(src, `0b00001111`, a);
47564	let e = _mm_setr_ps(`1.`, `1.`, `1.`, `1.`);
47565	assert_eq_m128(r, e);
47566	}
47567
47568	#[simd_test(enable = "avx512f,avx512vl")]
47569	unsafe fn test_mm_maskz_cvtph_ps() {
47570	let a = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
47571	let r = _mm_maskz_cvtph_ps(`0`, a);
47572	assert_eq_m128(r, _mm_setzero_ps());
47573	let r = _mm_maskz_cvtph_ps(`0b00001111`, a);
47574	let e = _mm_setr_ps(`1.`, `1.`, `1.`, `1.`);
47575	assert_eq_m128(r, e);
47576	}
47577
47578	#[simd_test(enable = "avx512f")]
47579	unsafe fn test_mm512_cvtt_roundps_epi32() {
47580	let a = _mm512_setr_ps(
47581	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47582	);
47583	let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
47584	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47585	assert_eq_m512i(r, e);
47586	}
47587
47588	#[simd_test(enable = "avx512f")]
47589	unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
47590	let a = _mm512_setr_ps(
47591	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47592	);
47593	let src = _mm512_set1_epi32(`0`);
47594	let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, `0`, a);
47595	assert_eq_m512i(r, src);
47596	let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
47597	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47598	assert_eq_m512i(r, e);
47599	}
47600
47601	#[simd_test(enable = "avx512f")]
47602	unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
47603	let a = _mm512_setr_ps(
47604	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47605	);
47606	let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(`0`, a);
47607	assert_eq_m512i(r, _mm512_setzero_si512());
47608	let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
47609	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47610	assert_eq_m512i(r, e);
47611	}
47612
47613	#[simd_test(enable = "avx512f")]
47614	unsafe fn test_mm512_cvtt_roundps_epu32() {
47615	let a = _mm512_setr_ps(
47616	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47617	);
47618	let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
47619	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47620	assert_eq_m512i(r, e);
47621	}
47622
47623	#[simd_test(enable = "avx512f")]
47624	unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
47625	let a = _mm512_setr_ps(
47626	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47627	);
47628	let src = _mm512_set1_epi32(`0`);
47629	let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, `0`, a);
47630	assert_eq_m512i(r, src);
47631	let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
47632	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47633	assert_eq_m512i(r, e);
47634	}
47635
47636	#[simd_test(enable = "avx512f")]
47637	unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
47638	let a = _mm512_setr_ps(
47639	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47640	);
47641	let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(`0`, a);
47642	assert_eq_m512i(r, _mm512_setzero_si512());
47643	let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
47644	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47645	assert_eq_m512i(r, e);
47646	}
47647
47648	#[simd_test(enable = "avx512f")]
47649	unsafe fn test_mm512_cvttps_epi32() {
47650	let a = _mm512_setr_ps(
47651	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47652	);
47653	let r = _mm512_cvttps_epi32(a);
47654	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47655	assert_eq_m512i(r, e);
47656	}
47657
47658	#[simd_test(enable = "avx512f")]
47659	unsafe fn test_mm512_mask_cvttps_epi32() {
47660	let a = _mm512_setr_ps(
47661	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47662	);
47663	let src = _mm512_set1_epi32(`0`);
47664	let r = _mm512_mask_cvttps_epi32(src, `0`, a);
47665	assert_eq_m512i(r, src);
47666	let r = _mm512_mask_cvttps_epi32(src, `0b00000000_11111111`, a);
47667	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47668	assert_eq_m512i(r, e);
47669	}
47670
47671	#[simd_test(enable = "avx512f")]
47672	unsafe fn test_mm512_maskz_cvttps_epi32() {
47673	let a = _mm512_setr_ps(
47674	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47675	);
47676	let r = _mm512_maskz_cvttps_epi32(`0`, a);
47677	assert_eq_m512i(r, _mm512_setzero_si512());
47678	let r = _mm512_maskz_cvttps_epi32(`0b00000000_11111111`, a);
47679	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47680	assert_eq_m512i(r, e);
47681	}
47682
47683	#[simd_test(enable = "avx512f,avx512vl")]
47684	unsafe fn test_mm256_mask_cvttps_epi32() {
47685	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
47686	let src = _mm256_set1_epi32(`0`);
47687	let r = _mm256_mask_cvttps_epi32(src, `0`, a);
47688	assert_eq_m256i(r, src);
47689	let r = _mm256_mask_cvttps_epi32(src, `0b11111111`, a);
47690	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47691	assert_eq_m256i(r, e);
47692	}
47693
47694	#[simd_test(enable = "avx512f,avx512vl")]
47695	unsafe fn test_mm256_maskz_cvttps_epi32() {
47696	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
47697	let r = _mm256_maskz_cvttps_epi32(`0`, a);
47698	assert_eq_m256i(r, _mm256_setzero_si256());
47699	let r = _mm256_maskz_cvttps_epi32(`0b11111111`, a);
47700	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47701	assert_eq_m256i(r, e);
47702	}
47703
47704	#[simd_test(enable = "avx512f,avx512vl")]
47705	unsafe fn test_mm_mask_cvttps_epi32() {
47706	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
47707	let src = _mm_set1_epi32(`0`);
47708	let r = _mm_mask_cvttps_epi32(src, `0`, a);
47709	assert_eq_m128i(r, src);
47710	let r = _mm_mask_cvttps_epi32(src, `0b00001111`, a);
47711	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
47712	assert_eq_m128i(r, e);
47713	}
47714
47715	#[simd_test(enable = "avx512f,avx512vl")]
47716	unsafe fn test_mm_maskz_cvttps_epi32() {
47717	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
47718	let r = _mm_maskz_cvttps_epi32(`0`, a);
47719	assert_eq_m128i(r, _mm_setzero_si128());
47720	let r = _mm_maskz_cvttps_epi32(`0b00001111`, a);
47721	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
47722	assert_eq_m128i(r, e);
47723	}
47724
47725	#[simd_test(enable = "avx512f")]
47726	unsafe fn test_mm512_cvttps_epu32() {
47727	let a = _mm512_setr_ps(
47728	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47729	);
47730	let r = _mm512_cvttps_epu32(a);
47731	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47732	assert_eq_m512i(r, e);
47733	}
47734
47735	#[simd_test(enable = "avx512f")]
47736	unsafe fn test_mm512_mask_cvttps_epu32() {
47737	let a = _mm512_setr_ps(
47738	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47739	);
47740	let src = _mm512_set1_epi32(`0`);
47741	let r = _mm512_mask_cvttps_epu32(src, `0`, a);
47742	assert_eq_m512i(r, src);
47743	let r = _mm512_mask_cvttps_epu32(src, `0b00000000_11111111`, a);
47744	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47745	assert_eq_m512i(r, e);
47746	}
47747
47748	#[simd_test(enable = "avx512f")]
47749	unsafe fn test_mm512_maskz_cvttps_epu32() {
47750	let a = _mm512_setr_ps(
47751	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47752	);
47753	let r = _mm512_maskz_cvttps_epu32(`0`, a);
47754	assert_eq_m512i(r, _mm512_setzero_si512());
47755	let r = _mm512_maskz_cvttps_epu32(`0b00000000_11111111`, a);
47756	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47757	assert_eq_m512i(r, e);
47758	}
47759
47760	#[simd_test(enable = "avx512f,avx512vl")]
47761	unsafe fn test_mm256_cvttps_epu32() {
47762	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
47763	let r = _mm256_cvttps_epu32(a);
47764	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47765	assert_eq_m256i(r, e);
47766	}
47767
47768	#[simd_test(enable = "avx512f,avx512vl")]
47769	unsafe fn test_mm256_mask_cvttps_epu32() {
47770	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
47771	let src = _mm256_set1_epi32(`0`);
47772	let r = _mm256_mask_cvttps_epu32(src, `0`, a);
47773	assert_eq_m256i(r, src);
47774	let r = _mm256_mask_cvttps_epu32(src, `0b11111111`, a);
47775	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47776	assert_eq_m256i(r, e);
47777	}
47778
47779	#[simd_test(enable = "avx512f,avx512vl")]
47780	unsafe fn test_mm256_maskz_cvttps_epu32() {
47781	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
47782	let r = _mm256_maskz_cvttps_epu32(`0`, a);
47783	assert_eq_m256i(r, _mm256_setzero_si256());
47784	let r = _mm256_maskz_cvttps_epu32(`0b11111111`, a);
47785	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47786	assert_eq_m256i(r, e);
47787	}
47788
47789	#[simd_test(enable = "avx512f,avx512vl")]
47790	unsafe fn test_mm_cvttps_epu32() {
47791	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
47792	let r = _mm_cvttps_epu32(a);
47793	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
47794	assert_eq_m128i(r, e);
47795	}
47796
47797	#[simd_test(enable = "avx512f,avx512vl")]
47798	unsafe fn test_mm_mask_cvttps_epu32() {
47799	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
47800	let src = _mm_set1_epi32(`0`);
47801	let r = _mm_mask_cvttps_epu32(src, `0`, a);
47802	assert_eq_m128i(r, src);
47803	let r = _mm_mask_cvttps_epu32(src, `0b00001111`, a);
47804	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
47805	assert_eq_m128i(r, e);
47806	}
47807
47808	#[simd_test(enable = "avx512f,avx512vl")]
47809	unsafe fn test_mm_maskz_cvttps_epu32() {
47810	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
47811	let r = _mm_maskz_cvttps_epu32(`0`, a);
47812	assert_eq_m128i(r, _mm_setzero_si128());
47813	let r = _mm_maskz_cvttps_epu32(`0b00001111`, a);
47814	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
47815	assert_eq_m128i(r, e);
47816	}
47817
47818	#[simd_test(enable = "avx512f")]
47819	unsafe fn test_mm512_i32gather_ps() {
47820	let arr: [f32; `256`] = core::array::from_fn(\|i\| i as f32);
47821	// A multiplier of 4 is word-addressing
47822	#[rustfmt::skip]
47823	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
47824	`120`, `128`, `136`, `144`, `152`, `160`, `168`, `176`);
47825	let r = _mm512_i32gather_ps::<`4`>(index, arr.as_ptr() as *const u8);
47826	#[rustfmt::skip]
47827	assert_eq_m512(r, _mm512_setr_ps(`0.`, `16.`, `32.`, `48.`, `64.`, `80.`, `96.`, `112.`,
47828	`120.`, `128.`, `136.`, `144.`, `152.`, `160.`, `168.`, `176.`));
47829	}
47830
47831	#[simd_test(enable = "avx512f")]
47832	unsafe fn test_mm512_mask_i32gather_ps() {
47833	let arr: [f32; `256`] = core::array::from_fn(\|i\| i as f32);
47834	let src = _mm512_set1_ps(`2.`);
47835	let mask = `0b10101010_10101010`;
47836	#[rustfmt::skip]
47837	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
47838	`120`, `128`, `136`, `144`, `152`, `160`, `168`, `176`);
47839	// A multiplier of 4 is word-addressing
47840	let r = _mm512_mask_i32gather_ps::<`4`>(src, mask, index, arr.as_ptr() as *const u8);
47841	#[rustfmt::skip]
47842	assert_eq_m512(r, _mm512_setr_ps(`2.`, `16.`, `2.`, `48.`, `2.`, `80.`, `2.`, `112.`,
47843	`2.`, `128.`, `2.`, `144.`, `2.`, `160.`, `2.`, `176.`));
47844	}
47845
47846	#[simd_test(enable = "avx512f")]
47847	unsafe fn test_mm512_i32gather_epi32() {
47848	let arr: [i32; `256`] = core::array::from_fn(\|i\| i as i32);
47849	// A multiplier of 4 is word-addressing
47850	#[rustfmt::skip]
47851	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
47852	`120`, `128`, `136`, `144`, `152`, `160`, `168`, `176`);
47853	let r = _mm512_i32gather_epi32::<`4`>(index, arr.as_ptr() as *const u8);
47854	#[rustfmt::skip]
47855	assert_eq_m512i(r, _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
47856	`120`, `128`, `136`, `144`, `152`, `160`, `168`, `176`));
47857	}
47858
47859	#[simd_test(enable = "avx512f")]
47860	unsafe fn test_mm512_mask_i32gather_epi32() {
47861	let arr: [i32; `256`] = core::array::from_fn(\|i\| i as i32);
47862	let src = _mm512_set1_epi32(`2`);
47863	let mask = `0b10101010_10101010`;
47864	let index = _mm512_setr_epi32(
47865	`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`, `128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`,
47866	);
47867	// A multiplier of 4 is word-addressing
47868	let r = _mm512_mask_i32gather_epi32::<`4`>(src, mask, index, arr.as_ptr() as *const u8);
47869	assert_eq_m512i(
47870	r,
47871	_mm512_setr_epi32(`2`, `16`, `2`, `48`, `2`, `80`, `2`, `112`, `2`, `144`, `2`, `176`, `2`, `208`, `2`, `240`),
47872	);
47873	}
47874
47875	#[simd_test(enable = "avx512f")]
47876	unsafe fn test_mm512_i32scatter_ps() {
47877	let mut arr = [`0f32`; `256`];
47878	#[rustfmt::skip]
47879	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
47880	`128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`);
47881	let src = _mm512_setr_ps(
47882	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
47883	);
47884	// A multiplier of 4 is word-addressing
47885	_mm512_i32scatter_ps::<`4`>(arr.as_mut_ptr() as *mut u8, index, src);
47886	let mut expected = [`0f32`; `256`];
47887	for i in `0`..`16` {
47888	expected[i * `16`] = (i + `1`) as f32;
47889	}
47890	assert_eq!(&arr[..], &expected[..],);
47891	}
47892
47893	#[simd_test(enable = "avx512f")]
47894	unsafe fn test_mm512_mask_i32scatter_ps() {
47895	let mut arr = [`0f32`; `256`];
47896	let mask = `0b10101010_10101010`;
47897	#[rustfmt::skip]
47898	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
47899	`128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`);
47900	let src = _mm512_setr_ps(
47901	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
47902	);
47903	// A multiplier of 4 is word-addressing
47904	_mm512_mask_i32scatter_ps::<`4`>(arr.as_mut_ptr() as *mut u8, mask, index, src);
47905	let mut expected = [`0f32`; `256`];
47906	for i in `0`..`8` {
47907	expected[i * `32` + `16`] = `2.` * (i + `1`) as f32;
47908	}
47909	assert_eq!(&arr[..], &expected[..],);
47910	}
47911
47912	#[simd_test(enable = "avx512f")]
47913	unsafe fn test_mm512_i32scatter_epi32() {
47914	let mut arr = [`0i32`; `256`];
47915	#[rustfmt::skip]
47916
47917	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
47918	`128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`);
47919	let src = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
47920	// A multiplier of 4 is word-addressing
47921	_mm512_i32scatter_epi32::<`4`>(arr.as_mut_ptr() as *mut u8, index, src);
47922	let mut expected = [`0i32`; `256`];
47923	for i in `0`..`16` {
47924	expected[i * `16`] = (i + `1`) as i32;
47925	}
47926	assert_eq!(&arr[..], &expected[..],);
47927	}
47928
47929	#[simd_test(enable = "avx512f")]
47930	unsafe fn test_mm512_mask_i32scatter_epi32() {
47931	let mut arr = [`0i32`; `256`];
47932	let mask = `0b10101010_10101010`;
47933	#[rustfmt::skip]
47934	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
47935	`128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`);
47936	let src = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
47937	// A multiplier of 4 is word-addressing
47938	_mm512_mask_i32scatter_epi32::<`4`>(arr.as_mut_ptr() as *mut u8, mask, index, src);
47939	let mut expected = [`0i32`; `256`];
47940	for i in `0`..`8` {
47941	expected[i * `32` + `16`] = `2` * (i + `1`) as i32;
47942	}
47943	assert_eq!(&arr[..], &expected[..],);
47944	}
47945
47946	#[simd_test(enable = "avx512f")]
47947	unsafe fn test_mm512_cmplt_ps_mask() {
47948	#[rustfmt::skip]
47949	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
47950	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
47951	let b = _mm512_set1_ps(`-1.`);
47952	let m = _mm512_cmplt_ps_mask(a, b);
47953	assert_eq!(m, `0b00000101_00000101`);
47954	}
47955
47956	#[simd_test(enable = "avx512f")]
47957	unsafe fn test_mm512_mask_cmplt_ps_mask() {
47958	#[rustfmt::skip]
47959	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
47960	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
47961	let b = _mm512_set1_ps(`-1.`);
47962	let mask = `0b01100110_01100110`;
47963	let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
47964	assert_eq!(r, `0b00000100_00000100`);
47965	}
47966
47967	#[simd_test(enable = "avx512f")]
47968	unsafe fn test_mm512_cmpnlt_ps_mask() {
47969	#[rustfmt::skip]
47970	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
47971	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
47972	let b = _mm512_set1_ps(`-1.`);
47973	assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
47974	}
47975
47976	#[simd_test(enable = "avx512f")]
47977	unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
47978	#[rustfmt::skip]
47979	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
47980	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
47981	let b = _mm512_set1_ps(`-1.`);
47982	let mask = `0b01111010_01111010`;
47983	assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), `0b01111010_01111010`);
47984	}
47985
47986	#[simd_test(enable = "avx512f")]
47987	unsafe fn test_mm512_cmpnle_ps_mask() {
47988	#[rustfmt::skip]
47989	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
47990	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
47991	let b = _mm512_set1_ps(`-1.`);
47992	let m = _mm512_cmpnle_ps_mask(b, a);
47993	assert_eq!(m, `0b00001101_00001101`);
47994	}
47995
47996	#[simd_test(enable = "avx512f")]
47997	unsafe fn test_mm512_mask_cmpnle_ps_mask() {
47998	#[rustfmt::skip]
47999	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
48000	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
48001	let b = _mm512_set1_ps(`-1.`);
48002	let mask = `0b01100110_01100110`;
48003	let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
48004	assert_eq!(r, `0b00000100_00000100`);
48005	}
48006
48007	#[simd_test(enable = "avx512f")]
48008	unsafe fn test_mm512_cmple_ps_mask() {
48009	#[rustfmt::skip]
48010	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
48011	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
48012	let b = _mm512_set1_ps(`-1.`);
48013	assert_eq!(_mm512_cmple_ps_mask(a, b), `0b00100101_00100101`);
48014	}
48015
48016	#[simd_test(enable = "avx512f")]
48017	unsafe fn test_mm512_mask_cmple_ps_mask() {
48018	#[rustfmt::skip]
48019	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
48020	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
48021	let b = _mm512_set1_ps(`-1.`);
48022	let mask = `0b01111010_01111010`;
48023	assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), `0b00100000_00100000`);
48024	}
48025
48026	#[simd_test(enable = "avx512f")]
48027	unsafe fn test_mm512_cmpeq_ps_mask() {
48028	#[rustfmt::skip]
48029	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
48030	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
48031	#[rustfmt::skip]
48032	let b = _mm512_set_ps(`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
48033	`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
48034	let m = _mm512_cmpeq_ps_mask(b, a);
48035	assert_eq!(m, `0b11001101_11001101`);
48036	}
48037
48038	#[simd_test(enable = "avx512f")]
48039	unsafe fn test_mm512_mask_cmpeq_ps_mask() {
48040	#[rustfmt::skip]
48041	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
48042	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
48043	#[rustfmt::skip]
48044	let b = _mm512_set_ps(`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
48045	`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
48046	let mask = `0b01111010_01111010`;
48047	let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
48048	assert_eq!(r, `0b01001000_01001000`);
48049	}
48050
48051	#[simd_test(enable = "avx512f")]
48052	unsafe fn test_mm512_cmpneq_ps_mask() {
48053	#[rustfmt::skip]
48054	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
48055	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
48056	#[rustfmt::skip]
48057	let b = _mm512_set_ps(`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
48058	`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
48059	let m = _mm512_cmpneq_ps_mask(b, a);
48060	assert_eq!(m, `0b00110010_00110010`);
48061	}
48062
48063	#[simd_test(enable = "avx512f")]
48064	unsafe fn test_mm512_mask_cmpneq_ps_mask() {
48065	#[rustfmt::skip]
48066	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
48067	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
48068	#[rustfmt::skip]
48069	let b = _mm512_set_ps(`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
48070	`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
48071	let mask = `0b01111010_01111010`;
48072	let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
48073	assert_eq!(r, `0b00110010_00110010`)
48074	}
48075
48076	#[simd_test(enable = "avx512f")]
48077	unsafe fn test_mm512_cmp_ps_mask() {
48078	#[rustfmt::skip]
48079	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`,
48080	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
48081	let b = _mm512_set1_ps(`-1.`);
48082	let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
48083	assert_eq!(m, `0b00000101_00000101`);
48084	}
48085
48086	#[simd_test(enable = "avx512f")]
48087	unsafe fn test_mm512_mask_cmp_ps_mask() {
48088	#[rustfmt::skip]
48089	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`,
48090	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
48091	let b = _mm512_set1_ps(`-1.`);
48092	let mask = `0b01100110_01100110`;
48093	let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
48094	assert_eq!(r, `0b00000100_00000100`);
48095	}
48096
48097	#[simd_test(enable = "avx512f,avx512vl")]
48098	unsafe fn test_mm256_cmp_ps_mask() {
48099	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
48100	let b = _mm256_set1_ps(`-1.`);
48101	let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
48102	assert_eq!(m, `0b00000101`);
48103	}
48104
48105	#[simd_test(enable = "avx512f,avx512vl")]
48106	unsafe fn test_mm256_mask_cmp_ps_mask() {
48107	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
48108	let b = _mm256_set1_ps(`-1.`);
48109	let mask = `0b01100110`;
48110	let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
48111	assert_eq!(r, `0b00000100`);
48112	}
48113
48114	#[simd_test(enable = "avx512f,avx512vl")]
48115	unsafe fn test_mm_cmp_ps_mask() {
48116	let a = _mm_set_ps(`0.`, `1.`, `-1.`, `13.`);
48117	let b = _mm_set1_ps(`1.`);
48118	let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
48119	assert_eq!(m, `0b00001010`);
48120	}
48121
48122	#[simd_test(enable = "avx512f,avx512vl")]
48123	unsafe fn test_mm_mask_cmp_ps_mask() {
48124	let a = _mm_set_ps(`0.`, `1.`, `-1.`, `13.`);
48125	let b = _mm_set1_ps(`1.`);
48126	let mask = `0b11111111`;
48127	let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
48128	assert_eq!(r, `0b00001010`);
48129	}
48130
48131	#[simd_test(enable = "avx512f")]
48132	unsafe fn test_mm512_cmp_round_ps_mask() {
48133	#[rustfmt::skip]
48134	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`,
48135	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
48136	let b = _mm512_set1_ps(`-1.`);
48137	let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
48138	assert_eq!(m, `0b00000101_00000101`);
48139	}
48140
48141	#[simd_test(enable = "avx512f")]
48142	unsafe fn test_mm512_mask_cmp_round_ps_mask() {
48143	#[rustfmt::skip]
48144	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`,
48145	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
48146	let b = _mm512_set1_ps(`-1.`);
48147	let mask = `0b01100110_01100110`;
48148	let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
48149	assert_eq!(r, `0b00000100_00000100`);
48150	}
48151
48152	#[simd_test(enable = "avx512f")]
48153	unsafe fn test_mm512_cmpord_ps_mask() {
48154	#[rustfmt::skip]
48155	let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `-1.`, f32::NAN, `0.`,
48156	f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `1.`, f32::NAN, `2.`);
48157	#[rustfmt::skip]
48158	let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `0.`,
48159	f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `2.`);
48160	let m = _mm512_cmpord_ps_mask(a, b);
48161	assert_eq!(m, `0b00000101_00000101`);
48162	}
48163
48164	#[simd_test(enable = "avx512f")]
48165	unsafe fn test_mm512_mask_cmpord_ps_mask() {
48166	#[rustfmt::skip]
48167	let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `-1.`, f32::NAN, `0.`,
48168	f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `1.`, f32::NAN, `2.`);
48169	#[rustfmt::skip]
48170	let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `0.`,
48171	f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `2.`);
48172	let mask = `0b11000011_11000011`;
48173	let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
48174	assert_eq!(m, `0b00000001_00000001`);
48175	}
48176
48177	#[simd_test(enable = "avx512f")]
48178	unsafe fn test_mm512_cmpunord_ps_mask() {
48179	#[rustfmt::skip]
48180	let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `-1.`, f32::NAN, `0.`,
48181	f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `1.`, f32::NAN, `2.`);
48182	#[rustfmt::skip]
48183	let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `0.`,
48184	f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `2.`);
48185	let m = _mm512_cmpunord_ps_mask(a, b);
48186
48187	assert_eq!(m, `0b11111010_11111010`);
48188	}
48189
48190	#[simd_test(enable = "avx512f")]
48191	unsafe fn test_mm512_mask_cmpunord_ps_mask() {
48192	#[rustfmt::skip]
48193	let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `-1.`, f32::NAN, `0.`,
48194	f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `1.`, f32::NAN, `2.`);
48195	#[rustfmt::skip]
48196	let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `0.`,
48197	f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `2.`);
48198	let mask = `0b00001111_00001111`;
48199	let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
48200	assert_eq!(m, `0b000001010_00001010`);
48201	}
48202
48203	#[simd_test(enable = "avx512f")]
48204	unsafe fn test_mm_cmp_ss_mask() {
48205	let a = _mm_setr_ps(`2.`, `1.`, `1.`, `1.`);
48206	let b = _mm_setr_ps(`1.`, `2.`, `2.`, `2.`);
48207	let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
48208	assert_eq!(m, `1`);
48209	}
48210
48211	#[simd_test(enable = "avx512f")]
48212	unsafe fn test_mm_mask_cmp_ss_mask() {
48213	let a = _mm_setr_ps(`2.`, `1.`, `1.`, `1.`);
48214	let b = _mm_setr_ps(`1.`, `2.`, `2.`, `2.`);
48215	let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(`0b10`, a, b);
48216	assert_eq!(m, `0`);
48217	let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(`0b1`, a, b);
48218	assert_eq!(m, `1`);
48219	}
48220
48221	#[simd_test(enable = "avx512f")]
48222	unsafe fn test_mm_cmp_round_ss_mask() {
48223	let a = _mm_setr_ps(`2.`, `1.`, `1.`, `1.`);
48224	let b = _mm_setr_ps(`1.`, `2.`, `2.`, `2.`);
48225	let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
48226	assert_eq!(m, `1`);
48227	}
48228
48229	#[simd_test(enable = "avx512f")]
48230	unsafe fn test_mm_mask_cmp_round_ss_mask() {
48231	let a = _mm_setr_ps(`2.`, `1.`, `1.`, `1.`);
48232	let b = _mm_setr_ps(`1.`, `2.`, `2.`, `2.`);
48233	let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(`0b10`, a, b);
48234	assert_eq!(m, `0`);
48235	let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(`0b1`, a, b);
48236	assert_eq!(m, `1`);
48237	}
48238
48239	#[simd_test(enable = "avx512f")]
48240	unsafe fn test_mm_cmp_sd_mask() {
48241	let a = _mm_setr_pd(`2.`, `1.`);
48242	let b = _mm_setr_pd(`1.`, `2.`);
48243	let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
48244	assert_eq!(m, `1`);
48245	}
48246
48247	#[simd_test(enable = "avx512f")]
48248	unsafe fn test_mm_mask_cmp_sd_mask() {
48249	let a = _mm_setr_pd(`2.`, `1.`);
48250	let b = _mm_setr_pd(`1.`, `2.`);
48251	let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(`0b10`, a, b);
48252	assert_eq!(m, `0`);
48253	let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(`0b1`, a, b);
48254	assert_eq!(m, `1`);
48255	}
48256
48257	#[simd_test(enable = "avx512f")]
48258	unsafe fn test_mm_cmp_round_sd_mask() {
48259	let a = _mm_setr_pd(`2.`, `1.`);
48260	let b = _mm_setr_pd(`1.`, `2.`);
48261	let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
48262	assert_eq!(m, `1`);
48263	}
48264
48265	#[simd_test(enable = "avx512f")]
48266	unsafe fn test_mm_mask_cmp_round_sd_mask() {
48267	let a = _mm_setr_pd(`2.`, `1.`);
48268	let b = _mm_setr_pd(`1.`, `2.`);
48269	let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(`0b10`, a, b);
48270	assert_eq!(m, `0`);
48271	let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(`0b1`, a, b);
48272	assert_eq!(m, `1`);
48273	}
48274
48275	#[simd_test(enable = "avx512f")]
48276	unsafe fn test_mm512_cmplt_epu32_mask() {
48277	#[rustfmt::skip]
48278	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48279	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48280	let b = _mm512_set1_epi32(`-1`);
48281	let m = _mm512_cmplt_epu32_mask(a, b);
48282	assert_eq!(m, `0b11001111_11001111`);
48283	}
48284
48285	#[simd_test(enable = "avx512f")]
48286	unsafe fn test_mm512_mask_cmplt_epu32_mask() {
48287	#[rustfmt::skip]
48288	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48289	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48290	let b = _mm512_set1_epi32(`-1`);
48291	let mask = `0b01111010_01111010`;
48292	let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
48293	assert_eq!(r, `0b01001010_01001010`);
48294	}
48295
48296	#[simd_test(enable = "avx512f,avx512vl")]
48297	unsafe fn test_mm256_cmplt_epu32_mask() {
48298	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `101`, `100`, `99`);
48299	let b = _mm256_set1_epi32(`1`);
48300	let r = _mm256_cmplt_epu32_mask(a, b);
48301	assert_eq!(r, `0b10000000`);
48302	}
48303
48304	#[simd_test(enable = "avx512f,avx512vl")]
48305	unsafe fn test_mm256_mask_cmplt_epu32_mask() {
48306	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `101`, `100`, `99`);
48307	let b = _mm256_set1_epi32(`1`);
48308	let mask = `0b11111111`;
48309	let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
48310	assert_eq!(r, `0b10000000`);
48311	}
48312
48313	#[simd_test(enable = "avx512f,avx512vl")]
48314	unsafe fn test_mm_cmplt_epu32_mask() {
48315	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
48316	let b = _mm_set1_epi32(`1`);
48317	let r = _mm_cmplt_epu32_mask(a, b);
48318	assert_eq!(r, `0b00001000`);
48319	}
48320
48321	#[simd_test(enable = "avx512f,avx512vl")]
48322	unsafe fn test_mm_mask_cmplt_epu32_mask() {
48323	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
48324	let b = _mm_set1_epi32(`1`);
48325	let mask = `0b11111111`;
48326	let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
48327	assert_eq!(r, `0b00001000`);
48328	}
48329
48330	#[simd_test(enable = "avx512f")]
48331	unsafe fn test_mm512_cmpgt_epu32_mask() {
48332	#[rustfmt::skip]
48333	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48334	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48335	let b = _mm512_set1_epi32(`-1`);
48336	let m = _mm512_cmpgt_epu32_mask(b, a);
48337	assert_eq!(m, `0b11001111_11001111`);
48338	}
48339
48340	#[simd_test(enable = "avx512f")]
48341	unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
48342	#[rustfmt::skip]
48343	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48344	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48345	let b = _mm512_set1_epi32(`-1`);
48346	let mask = `0b01111010_01111010`;
48347	let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
48348	assert_eq!(r, `0b01001010_01001010`);
48349	}
48350
48351	#[simd_test(enable = "avx512f,avx512vl")]
48352	unsafe fn test_mm256_cmpgt_epu32_mask() {
48353	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `99`, `100`, `101`);
48354	let b = _mm256_set1_epi32(`1`);
48355	let r = _mm256_cmpgt_epu32_mask(a, b);
48356	assert_eq!(r, `0b00111111`);
48357	}
48358
48359	#[simd_test(enable = "avx512f,avx512vl")]
48360	unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
48361	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `99`, `100`, `101`);
48362	let b = _mm256_set1_epi32(`1`);
48363	let mask = `0b11111111`;
48364	let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
48365	assert_eq!(r, `0b00111111`);
48366	}
48367
48368	#[simd_test(enable = "avx512f,avx512vl")]
48369	unsafe fn test_mm_cmpgt_epu32_mask() {
48370	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
48371	let b = _mm_set1_epi32(`1`);
48372	let r = _mm_cmpgt_epu32_mask(a, b);
48373	assert_eq!(r, `0b00000011`);
48374	}
48375
48376	#[simd_test(enable = "avx512f,avx512vl")]
48377	unsafe fn test_mm_mask_cmpgt_epu32_mask() {
48378	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
48379	let b = _mm_set1_epi32(`1`);
48380	let mask = `0b11111111`;
48381	let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
48382	assert_eq!(r, `0b00000011`);
48383	}
48384
48385	#[simd_test(enable = "avx512f")]
48386	unsafe fn test_mm512_cmple_epu32_mask() {
48387	#[rustfmt::skip]
48388	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48389	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48390	let b = _mm512_set1_epi32(`-1`);
48391	assert_eq!(
48392	_mm512_cmple_epu32_mask(a, b),
48393	!_mm512_cmpgt_epu32_mask(a, b)
48394	)
48395	}
48396
48397	#[simd_test(enable = "avx512f")]
48398	unsafe fn test_mm512_mask_cmple_epu32_mask() {
48399	#[rustfmt::skip]
48400	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48401	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48402	let b = _mm512_set1_epi32(`-1`);
48403	let mask = `0b01111010_01111010`;
48404	assert_eq!(
48405	_mm512_mask_cmple_epu32_mask(mask, a, b),
48406	`0b01111010_01111010`
48407	);
48408	}
48409
48410	#[simd_test(enable = "avx512f,avx512vl")]
48411	unsafe fn test_mm256_cmple_epu32_mask() {
48412	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `200`, `100`, `101`);
48413	let b = _mm256_set1_epi32(`1`);
48414	let r = _mm256_cmple_epu32_mask(a, b);
48415	assert_eq!(r, `0b11000000`)
48416	}
48417
48418	#[simd_test(enable = "avx512f,avx512vl")]
48419	unsafe fn test_mm256_mask_cmple_epu32_mask() {
48420	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `200`, `100`, `101`);
48421	let b = _mm256_set1_epi32(`1`);
48422	let mask = `0b11111111`;
48423	let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
48424	assert_eq!(r, `0b11000000`)
48425	}
48426
48427	#[simd_test(enable = "avx512f,avx512vl")]
48428	unsafe fn test_mm_cmple_epu32_mask() {
48429	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
48430	let b = _mm_set1_epi32(`1`);
48431	let r = _mm_cmple_epu32_mask(a, b);
48432	assert_eq!(r, `0b00001100`)
48433	}
48434
48435	#[simd_test(enable = "avx512f,avx512vl")]
48436	unsafe fn test_mm_mask_cmple_epu32_mask() {
48437	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
48438	let b = _mm_set1_epi32(`1`);
48439	let mask = `0b11111111`;
48440	let r = _mm_mask_cmple_epu32_mask(mask, a, b);
48441	assert_eq!(r, `0b00001100`)
48442	}
48443
48444	#[simd_test(enable = "avx512f")]
48445	unsafe fn test_mm512_cmpge_epu32_mask() {
48446	#[rustfmt::skip]
48447	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48448	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48449	let b = _mm512_set1_epi32(`-1`);
48450	assert_eq!(
48451	_mm512_cmpge_epu32_mask(a, b),
48452	!_mm512_cmplt_epu32_mask(a, b)
48453	)
48454	}
48455
48456	#[simd_test(enable = "avx512f")]
48457	unsafe fn test_mm512_mask_cmpge_epu32_mask() {
48458	#[rustfmt::skip]
48459	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48460	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48461	let b = _mm512_set1_epi32(`-1`);
48462	let mask = `0b01111010_01111010`;
48463	assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), `0b01100000_0110000`);
48464	}
48465
48466	#[simd_test(enable = "avx512f,avx512vl")]
48467	unsafe fn test_mm256_cmpge_epu32_mask() {
48468	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `300`, `100`, `200`);
48469	let b = _mm256_set1_epi32(`1`);
48470	let r = _mm256_cmpge_epu32_mask(a, b);
48471	assert_eq!(r, `0b01111111`)
48472	}
48473
48474	#[simd_test(enable = "avx512f,avx512vl")]
48475	unsafe fn test_mm256_mask_cmpge_epu32_mask() {
48476	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `300`, `100`, `200`);
48477	let b = _mm256_set1_epi32(`1`);
48478	let mask = `0b11111111`;
48479	let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
48480	assert_eq!(r, `0b01111111`)
48481	}
48482
48483	#[simd_test(enable = "avx512f,avx512vl")]
48484	unsafe fn test_mm_cmpge_epu32_mask() {
48485	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
48486	let b = _mm_set1_epi32(`1`);
48487	let r = _mm_cmpge_epu32_mask(a, b);
48488	assert_eq!(r, `0b00000111`)
48489	}
48490
48491	#[simd_test(enable = "avx512f,avx512vl")]
48492	unsafe fn test_mm_mask_cmpge_epu32_mask() {
48493	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
48494	let b = _mm_set1_epi32(`1`);
48495	let mask = `0b11111111`;
48496	let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
48497	assert_eq!(r, `0b00000111`)
48498	}
48499
48500	#[simd_test(enable = "avx512f")]
48501	unsafe fn test_mm512_cmpeq_epu32_mask() {
48502	#[rustfmt::skip]
48503	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48504	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48505	#[rustfmt::skip]
48506	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
48507	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48508	let m = _mm512_cmpeq_epu32_mask(b, a);
48509	assert_eq!(m, `0b11001111_11001111`);
48510	}
48511
48512	#[simd_test(enable = "avx512f")]
48513	unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
48514	#[rustfmt::skip]
48515	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48516	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48517	#[rustfmt::skip]
48518	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
48519	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48520	let mask = `0b01111010_01111010`;
48521	let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
48522	assert_eq!(r, `0b01001010_01001010`);
48523	}
48524
48525	#[simd_test(enable = "avx512f,avx512vl")]
48526	unsafe fn test_mm256_cmpeq_epu32_mask() {
48527	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48528	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48529	let m = _mm256_cmpeq_epu32_mask(b, a);
48530	assert_eq!(m, `0b11001111`);
48531	}
48532
48533	#[simd_test(enable = "avx512f,avx512vl")]
48534	unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
48535	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48536	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48537	let mask = `0b01111010`;
48538	let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
48539	assert_eq!(r, `0b01001010`);
48540	}
48541
48542	#[simd_test(enable = "avx512f,avx512vl")]
48543	unsafe fn test_mm_cmpeq_epu32_mask() {
48544	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
48545	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
48546	let m = _mm_cmpeq_epu32_mask(b, a);
48547	assert_eq!(m, `0b00001100`);
48548	}
48549
48550	#[simd_test(enable = "avx512f,avx512vl")]
48551	unsafe fn test_mm_mask_cmpeq_epu32_mask() {
48552	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
48553	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
48554	let mask = `0b11111111`;
48555	let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
48556	assert_eq!(r, `0b00001100`);
48557	}
48558
48559	#[simd_test(enable = "avx512f")]
48560	unsafe fn test_mm512_cmpneq_epu32_mask() {
48561	#[rustfmt::skip]
48562	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48563	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48564	#[rustfmt::skip]
48565	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
48566	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48567	let m = _mm512_cmpneq_epu32_mask(b, a);
48568	assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
48569	}
48570
48571	#[simd_test(enable = "avx512f")]
48572	unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
48573	#[rustfmt::skip]
48574	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `-100`, `100`,
48575	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `-100`, `100`);
48576	#[rustfmt::skip]
48577	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
48578	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48579	let mask = `0b01111010_01111010`;
48580	let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
48581	assert_eq!(r, `0b00110010_00110010`);
48582	}
48583
48584	#[simd_test(enable = "avx512f,avx512vl")]
48585	unsafe fn test_mm256_cmpneq_epu32_mask() {
48586	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `-100`, `100`);
48587	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `-100`, `100`);
48588	let r = _mm256_cmpneq_epu32_mask(b, a);
48589	assert_eq!(r, `0b00110000`);
48590	}
48591
48592	#[simd_test(enable = "avx512f,avx512vl")]
48593	unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
48594	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `-100`, `100`);
48595	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `-100`, `100`);
48596	let mask = `0b11111111`;
48597	let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
48598	assert_eq!(r, `0b00110000`);
48599	}
48600
48601	#[simd_test(enable = "avx512f,avx512vl")]
48602	unsafe fn test_mm_cmpneq_epu32_mask() {
48603	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
48604	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
48605	let r = _mm_cmpneq_epu32_mask(b, a);
48606	assert_eq!(r, `0b00000011`);
48607	}
48608
48609	#[simd_test(enable = "avx512f,avx512vl")]
48610	unsafe fn test_mm_mask_cmpneq_epu32_mask() {
48611	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
48612	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
48613	let mask = `0b11111111`;
48614	let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
48615	assert_eq!(r, `0b00000011`);
48616	}
48617
48618	#[simd_test(enable = "avx512f")]
48619	unsafe fn test_mm512_cmp_epu32_mask() {
48620	#[rustfmt::skip]
48621	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48622	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48623	let b = _mm512_set1_epi32(`-1`);
48624	let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
48625	assert_eq!(m, `0b11001111_11001111`);
48626	}
48627
48628	#[simd_test(enable = "avx512f")]
48629	unsafe fn test_mm512_mask_cmp_epu32_mask() {
48630	#[rustfmt::skip]
48631	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48632	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48633	let b = _mm512_set1_epi32(`-1`);
48634	let mask = `0b01111010_01111010`;
48635	let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
48636	assert_eq!(r, `0b01001010_01001010`);
48637	}
48638
48639	#[simd_test(enable = "avx512f,avx512vl")]
48640	unsafe fn test_mm256_cmp_epu32_mask() {
48641	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48642	let b = _mm256_set1_epi32(`-1`);
48643	let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
48644	assert_eq!(m, `0b11001111`);
48645	}
48646
48647	#[simd_test(enable = "avx512f,avx512vl")]
48648	unsafe fn test_mm256_mask_cmp_epu32_mask() {
48649	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48650	let b = _mm256_set1_epi32(`-1`);
48651	let mask = `0b11111111`;
48652	let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
48653	assert_eq!(r, `0b11001111`);
48654	}
48655
48656	#[simd_test(enable = "avx512f,avx512vl")]
48657	unsafe fn test_mm_cmp_epu32_mask() {
48658	let a = _mm_set_epi32(`0`, `1`, `-1`, i32::MAX);
48659	let b = _mm_set1_epi32(`1`);
48660	let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
48661	assert_eq!(m, `0b00001000`);
48662	}
48663
48664	#[simd_test(enable = "avx512f,avx512vl")]
48665	unsafe fn test_mm_mask_cmp_epu32_mask() {
48666	let a = _mm_set_epi32(`0`, `1`, `-1`, i32::MAX);
48667	let b = _mm_set1_epi32(`1`);
48668	let mask = `0b11111111`;
48669	let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
48670	assert_eq!(r, `0b00001000`);
48671	}
48672
48673	#[simd_test(enable = "avx512f")]
48674	unsafe fn test_mm512_cmplt_epi32_mask() {
48675	#[rustfmt::skip]
48676	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48677	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48678	let b = _mm512_set1_epi32(`-1`);
48679	let m = _mm512_cmplt_epi32_mask(a, b);
48680	assert_eq!(m, `0b00000101_00000101`);
48681	}
48682
48683	#[simd_test(enable = "avx512f")]
48684	unsafe fn test_mm512_mask_cmplt_epi32_mask() {
48685	#[rustfmt::skip]
48686	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48687	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48688	let b = _mm512_set1_epi32(`-1`);
48689	let mask = `0b01100110_01100110`;
48690	let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
48691	assert_eq!(r, `0b00000100_00000100`);
48692	}
48693
48694	#[simd_test(enable = "avx512f,avx512vl")]
48695	unsafe fn test_mm256_cmplt_epi32_mask() {
48696	let a = _mm256_set_epi32(`0`, `1`, `-1`, `101`, i32::MAX, i32::MIN, `100`, `-100`);
48697	let b = _mm256_set1_epi32(`-1`);
48698	let r = _mm256_cmplt_epi32_mask(a, b);
48699	assert_eq!(r, `0b00000101`);
48700	}
48701
48702	#[simd_test(enable = "avx512f,avx512vl")]
48703	unsafe fn test_mm256_mask_cmplt_epi32_mask() {
48704	let a = _mm256_set_epi32(`0`, `1`, `-1`, `101`, i32::MAX, i32::MIN, `100`, `-100`);
48705	let b = _mm256_set1_epi32(`-1`);
48706	let mask = `0b11111111`;
48707	let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
48708	assert_eq!(r, `0b00000101`);
48709	}
48710
48711	#[simd_test(enable = "avx512f,avx512vl")]
48712	unsafe fn test_mm_cmplt_epi32_mask() {
48713	let a = _mm_set_epi32(i32::MAX, i32::MIN, `100`, `-100`);
48714	let b = _mm_set1_epi32(`-1`);
48715	let r = _mm_cmplt_epi32_mask(a, b);
48716	assert_eq!(r, `0b00000101`);
48717	}
48718
48719	#[simd_test(enable = "avx512f,avx512vl")]
48720	unsafe fn test_mm_mask_cmplt_epi32_mask() {
48721	let a = _mm_set_epi32(i32::MAX, i32::MIN, `100`, `-100`);
48722	let b = _mm_set1_epi32(`-1`);
48723	let mask = `0b11111111`;
48724	let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
48725	assert_eq!(r, `0b00000101`);
48726	}
48727
48728	#[simd_test(enable = "avx512f")]
48729	unsafe fn test_mm512_cmpgt_epi32_mask() {
48730	#[rustfmt::skip]
48731	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
48732	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
48733	let b = _mm512_set1_epi32(`-1`);
48734	let m = _mm512_cmpgt_epi32_mask(b, a);
48735	assert_eq!(m, `0b00000101_00000101`);
48736	}
48737
48738	#[simd_test(enable = "avx512f")]
48739	unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
48740	#[rustfmt::skip]
48741	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
48742	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
48743	let b = _mm512_set1_epi32(`-1`);
48744	let mask = `0b01100110_01100110`;
48745	let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
48746	assert_eq!(r, `0b00000100_00000100`);
48747	}
48748
48749	#[simd_test(enable = "avx512f,avx512vl")]
48750	unsafe fn test_mm256_cmpgt_epi32_mask() {
48751	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
48752	let b = _mm256_set1_epi32(`-1`);
48753	let r = _mm256_cmpgt_epi32_mask(a, b);
48754	assert_eq!(r, `0b11011010`);
48755	}
48756
48757	#[simd_test(enable = "avx512f,avx512vl")]
48758	unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
48759	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
48760	let b = _mm256_set1_epi32(`-1`);
48761	let mask = `0b11111111`;
48762	let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
48763	assert_eq!(r, `0b11011010`);
48764	}
48765
48766	#[simd_test(enable = "avx512f,avx512vl")]
48767	unsafe fn test_mm_cmpgt_epi32_mask() {
48768	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
48769	let b = _mm_set1_epi32(`-1`);
48770	let r = _mm_cmpgt_epi32_mask(a, b);
48771	assert_eq!(r, `0b00001101`);
48772	}
48773
48774	#[simd_test(enable = "avx512f,avx512vl")]
48775	unsafe fn test_mm_mask_cmpgt_epi32_mask() {
48776	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
48777	let b = _mm_set1_epi32(`-1`);
48778	let mask = `0b11111111`;
48779	let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
48780	assert_eq!(r, `0b00001101`);
48781	}
48782
48783	#[simd_test(enable = "avx512f")]
48784	unsafe fn test_mm512_cmple_epi32_mask() {
48785	#[rustfmt::skip]
48786	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48787	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48788	let b = _mm512_set1_epi32(`-1`);
48789	assert_eq!(
48790	_mm512_cmple_epi32_mask(a, b),
48791	!_mm512_cmpgt_epi32_mask(a, b)
48792	)
48793	}
48794
48795	#[simd_test(enable = "avx512f")]
48796	unsafe fn test_mm512_mask_cmple_epi32_mask() {
48797	#[rustfmt::skip]
48798	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48799	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48800	let b = _mm512_set1_epi32(`-1`);
48801	let mask = `0b01111010_01111010`;
48802	assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), `0b01100000_0110000`);
48803	}
48804
48805	#[simd_test(enable = "avx512f,avx512vl")]
48806	unsafe fn test_mm256_cmple_epi32_mask() {
48807	let a = _mm256_set_epi32(`0`, `1`, `-1`, `200`, i32::MAX, i32::MIN, `100`, `-100`);
48808	let b = _mm256_set1_epi32(`-1`);
48809	let r = _mm256_cmple_epi32_mask(a, b);
48810	assert_eq!(r, `0b00100101`)
48811	}
48812
48813	#[simd_test(enable = "avx512f,avx512vl")]
48814	unsafe fn test_mm256_mask_cmple_epi32_mask() {
48815	let a = _mm256_set_epi32(`0`, `1`, `-1`, `200`, i32::MAX, i32::MIN, `100`, `-100`);
48816	let b = _mm256_set1_epi32(`-1`);
48817	let mask = `0b11111111`;
48818	let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
48819	assert_eq!(r, `0b00100101`)
48820	}
48821
48822	#[simd_test(enable = "avx512f,avx512vl")]
48823	unsafe fn test_mm_cmple_epi32_mask() {
48824	let a = _mm_set_epi32(`0`, `1`, `-1`, `200`);
48825	let b = _mm_set1_epi32(`-1`);
48826	let r = _mm_cmple_epi32_mask(a, b);
48827	assert_eq!(r, `0b00000010`)
48828	}
48829
48830	#[simd_test(enable = "avx512f,avx512vl")]
48831	unsafe fn test_mm_mask_cmple_epi32_mask() {
48832	let a = _mm_set_epi32(`0`, `1`, `-1`, `200`);
48833	let b = _mm_set1_epi32(`-1`);
48834	let mask = `0b11111111`;
48835	let r = _mm_mask_cmple_epi32_mask(mask, a, b);
48836	assert_eq!(r, `0b00000010`)
48837	}
48838
48839	#[simd_test(enable = "avx512f")]
48840	unsafe fn test_mm512_cmpge_epi32_mask() {
48841	#[rustfmt::skip]
48842	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48843	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48844	let b = _mm512_set1_epi32(`-1`);
48845	assert_eq!(
48846	_mm512_cmpge_epi32_mask(a, b),
48847	!_mm512_cmplt_epi32_mask(a, b)
48848	)
48849	}
48850
48851	#[simd_test(enable = "avx512f")]
48852	unsafe fn test_mm512_mask_cmpge_epi32_mask() {
48853	#[rustfmt::skip]
48854	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
48855	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48856	let b = _mm512_set1_epi32(`-1`);
48857	let mask = `0b01111010_01111010`;
48858	assert_eq!(
48859	_mm512_mask_cmpge_epi32_mask(mask, a, b),
48860	`0b01111010_01111010`
48861	);
48862	}
48863
48864	#[simd_test(enable = "avx512f,avx512vl")]
48865	unsafe fn test_mm256_cmpge_epi32_mask() {
48866	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48867	let b = _mm256_set1_epi32(`-1`);
48868	let r = _mm256_cmpge_epi32_mask(a, b);
48869	assert_eq!(r, `0b11111010`)
48870	}
48871
48872	#[simd_test(enable = "avx512f,avx512vl")]
48873	unsafe fn test_mm256_mask_cmpge_epi32_mask() {
48874	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
48875	let b = _mm256_set1_epi32(`-1`);
48876	let mask = `0b11111111`;
48877	let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
48878	assert_eq!(r, `0b11111010`)
48879	}
48880
48881	#[simd_test(enable = "avx512f,avx512vl")]
48882	unsafe fn test_mm_cmpge_epi32_mask() {
48883	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
48884	let b = _mm_set1_epi32(`-1`);
48885	let r = _mm_cmpge_epi32_mask(a, b);
48886	assert_eq!(r, `0b00001111`)
48887	}
48888
48889	#[simd_test(enable = "avx512f,avx512vl")]
48890	unsafe fn test_mm_mask_cmpge_epi32_mask() {
48891	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
48892	let b = _mm_set1_epi32(`-1`);
48893	let mask = `0b11111111`;
48894	let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
48895	assert_eq!(r, `0b00001111`)
48896	}
48897
48898	#[simd_test(enable = "avx512f")]
48899	unsafe fn test_mm512_cmpeq_epi32_mask() {
48900	#[rustfmt::skip]
48901	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
48902	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
48903	#[rustfmt::skip]
48904	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
48905	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48906	let m = _mm512_cmpeq_epi32_mask(b, a);
48907	assert_eq!(m, `0b11001111_11001111`);
48908	}
48909
48910	#[simd_test(enable = "avx512f")]
48911	unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
48912	#[rustfmt::skip]
48913	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
48914	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
48915	#[rustfmt::skip]
48916	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
48917	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48918	let mask = `0b01111010_01111010`;
48919	let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
48920	assert_eq!(r, `0b01001010_01001010`);
48921	}
48922
48923	#[simd_test(enable = "avx512f,avx512vl")]
48924	unsafe fn test_mm256_cmpeq_epi32_mask() {
48925	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
48926	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48927	let m = _mm256_cmpeq_epi32_mask(b, a);
48928	assert_eq!(m, `0b11001111`);
48929	}
48930
48931	#[simd_test(enable = "avx512f,avx512vl")]
48932	unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
48933	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
48934	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48935	let mask = `0b01111010`;
48936	let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
48937	assert_eq!(r, `0b01001010`);
48938	}
48939
48940	#[simd_test(enable = "avx512f,avx512vl")]
48941	unsafe fn test_mm_cmpeq_epi32_mask() {
48942	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
48943	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
48944	let m = _mm_cmpeq_epi32_mask(b, a);
48945	assert_eq!(m, `0b00001100`);
48946	}
48947
48948	#[simd_test(enable = "avx512f,avx512vl")]
48949	unsafe fn test_mm_mask_cmpeq_epi32_mask() {
48950	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
48951	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
48952	let mask = `0b11111111`;
48953	let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
48954	assert_eq!(r, `0b00001100`);
48955	}
48956
48957	#[simd_test(enable = "avx512f")]
48958	unsafe fn test_mm512_cmpneq_epi32_mask() {
48959	#[rustfmt::skip]
48960	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
48961	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
48962	#[rustfmt::skip]
48963	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
48964	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48965	let m = _mm512_cmpneq_epi32_mask(b, a);
48966	assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
48967	}
48968
48969	#[simd_test(enable = "avx512f")]
48970	unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
48971	#[rustfmt::skip]
48972	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `-100`, `100`,
48973	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `-100`, `100`);
48974	#[rustfmt::skip]
48975	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
48976	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48977	let mask = `0b01111010_01111010`;
48978	let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
48979	assert_eq!(r, `0b00110010_00110010`)
48980	}
48981
48982	#[simd_test(enable = "avx512f,avx512vl")]
48983	unsafe fn test_mm256_cmpneq_epi32_mask() {
48984	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
48985	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48986	let m = _mm256_cmpneq_epi32_mask(b, a);
48987	assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
48988	}
48989
48990	#[simd_test(enable = "avx512f,avx512vl")]
48991	unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
48992	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `-100`, `100`);
48993	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
48994	let mask = `0b11111111`;
48995	let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
48996	assert_eq!(r, `0b00110011`)
48997	}
48998
48999	#[simd_test(enable = "avx512f,avx512vl")]
49000	unsafe fn test_mm_cmpneq_epi32_mask() {
49001	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
49002	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
49003	let r = _mm_cmpneq_epi32_mask(b, a);
49004	assert_eq!(r, `0b00000011`)
49005	}
49006
49007	#[simd_test(enable = "avx512f,avx512vl")]
49008	unsafe fn test_mm_mask_cmpneq_epi32_mask() {
49009	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
49010	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
49011	let mask = `0b11111111`;
49012	let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
49013	assert_eq!(r, `0b00000011`)
49014	}
49015
49016	#[simd_test(enable = "avx512f")]
49017	unsafe fn test_mm512_cmp_epi32_mask() {
49018	#[rustfmt::skip]
49019	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
49020	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
49021	let b = _mm512_set1_epi32(`-1`);
49022	let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
49023	assert_eq!(m, `0b00000101_00000101`);
49024	}
49025
49026	#[simd_test(enable = "avx512f")]
49027	unsafe fn test_mm512_mask_cmp_epi32_mask() {
49028	#[rustfmt::skip]
49029	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
49030	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
49031	let b = _mm512_set1_epi32(`-1`);
49032	let mask = `0b01100110_01100110`;
49033	let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
49034	assert_eq!(r, `0b00000100_00000100`);
49035	}
49036
49037	#[simd_test(enable = "avx512f,avx512vl")]
49038	unsafe fn test_mm256_cmp_epi32_mask() {
49039	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
49040	let b = _mm256_set1_epi32(`-1`);
49041	let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
49042	assert_eq!(m, `0b00000101`);
49043	}
49044
49045	#[simd_test(enable = "avx512f,avx512vl")]
49046	unsafe fn test_mm256_mask_cmp_epi32_mask() {
49047	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
49048	let b = _mm256_set1_epi32(`-1`);
49049	let mask = `0b01100110`;
49050	let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
49051	assert_eq!(r, `0b00000100`);
49052	}
49053
49054	#[simd_test(enable = "avx512f,avx512vl")]
49055	unsafe fn test_mm_cmp_epi32_mask() {
49056	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
49057	let b = _mm_set1_epi32(`1`);
49058	let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
49059	assert_eq!(m, `0b00001010`);
49060	}
49061
49062	#[simd_test(enable = "avx512f,avx512vl")]
49063	unsafe fn test_mm_mask_cmp_epi32_mask() {
49064	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
49065	let b = _mm_set1_epi32(`1`);
49066	let mask = `0b11111111`;
49067	let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
49068	assert_eq!(r, `0b00001010`);
49069	}
49070
49071	#[simd_test(enable = "avx512f")]
49072	unsafe fn test_mm512_set_epi8() {
49073	let r = _mm512_set1_epi8(`2`);
49074	assert_eq_m512i(
49075	r,
49076	_mm512_set_epi8(
49077	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
49078	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
49079	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
49080	),
49081	)
49082	}
49083
49084	#[simd_test(enable = "avx512f")]
49085	unsafe fn test_mm512_set_epi16() {
49086	let r = _mm512_set1_epi16(`2`);
49087	assert_eq_m512i(
49088	r,
49089	_mm512_set_epi16(
49090	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
49091	`2`, `2`, `2`, `2`,
49092	),
49093	)
49094	}
49095
49096	#[simd_test(enable = "avx512f")]
49097	unsafe fn test_mm512_set_epi32() {
49098	let r = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49099	assert_eq_m512i(
49100	r,
49101	_mm512_set_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`),
49102	)
49103	}
49104
49105	#[simd_test(enable = "avx512f")]
49106	unsafe fn test_mm512_setr_epi32() {
49107	let r = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49108	assert_eq_m512i(
49109	r,
49110	_mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`),
49111	)
49112	}
49113
49114	#[simd_test(enable = "avx512f")]
49115	unsafe fn test_mm512_set1_epi8() {
49116	let r = _mm512_set_epi8(
49117	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
49118	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
49119	`2`, `2`, `2`, `2`, `2`, `2`,
49120	);
49121	assert_eq_m512i(r, _mm512_set1_epi8(`2`));
49122	}
49123
49124	#[simd_test(enable = "avx512f")]
49125	unsafe fn test_mm512_set1_epi16() {
49126	let r = _mm512_set_epi16(
49127	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
49128	`2`, `2`, `2`,
49129	);
49130	assert_eq_m512i(r, _mm512_set1_epi16(`2`));
49131	}
49132
49133	#[simd_test(enable = "avx512f")]
49134	unsafe fn test_mm512_set1_epi32() {
49135	let r = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
49136	assert_eq_m512i(r, _mm512_set1_epi32(`2`));
49137	}
49138
49139	#[simd_test(enable = "avx512f")]
49140	unsafe fn test_mm512_setzero_si512() {
49141	assert_eq_m512i(_mm512_set1_epi32(`0`), _mm512_setzero_si512());
49142	}
49143
49144	#[simd_test(enable = "avx512f")]
49145	unsafe fn test_mm512_setzero_epi32() {
49146	assert_eq_m512i(_mm512_set1_epi32(`0`), _mm512_setzero_epi32());
49147	}
49148
49149	#[simd_test(enable = "avx512f")]
49150	unsafe fn test_mm512_set_ps() {
49151	let r = _mm512_setr_ps(
49152	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49153	);
49154	assert_eq_m512(
49155	r,
49156	_mm512_set_ps(
49157	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
49158	),
49159	)
49160	}
49161
49162	#[simd_test(enable = "avx512f")]
49163	unsafe fn test_mm512_setr_ps() {
49164	let r = _mm512_set_ps(
49165	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49166	);
49167	assert_eq_m512(
49168	r,
49169	_mm512_setr_ps(
49170	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
49171	),
49172	)
49173	}
49174
49175	#[simd_test(enable = "avx512f")]
49176	unsafe fn test_mm512_set1_ps() {
49177	#[rustfmt::skip]
49178	let expected = _mm512_set_ps(`2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
49179	`2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`);
49180	assert_eq_m512(expected, _mm512_set1_ps(`2.`));
49181	}
49182
49183	#[simd_test(enable = "avx512f")]
49184	unsafe fn test_mm512_set4_epi32() {
49185	let r = _mm512_set_epi32(`4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`);
49186	assert_eq_m512i(r, _mm512_set4_epi32(`4`, `3`, `2`, `1`));
49187	}
49188
49189	#[simd_test(enable = "avx512f")]
49190	unsafe fn test_mm512_set4_ps() {
49191	let r = _mm512_set_ps(
49192	`4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`,
49193	);
49194	assert_eq_m512(r, _mm512_set4_ps(`4.`, `3.`, `2.`, `1.`));
49195	}
49196
49197	#[simd_test(enable = "avx512f")]
49198	unsafe fn test_mm512_setr4_epi32() {
49199	let r = _mm512_set_epi32(`4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`);
49200	assert_eq_m512i(r, _mm512_setr4_epi32(`1`, `2`, `3`, `4`));
49201	}
49202
49203	#[simd_test(enable = "avx512f")]
49204	unsafe fn test_mm512_setr4_ps() {
49205	let r = _mm512_set_ps(
49206	`4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`,
49207	);
49208	assert_eq_m512(r, _mm512_setr4_ps(`1.`, `2.`, `3.`, `4.`));
49209	}
49210
49211	#[simd_test(enable = "avx512f")]
49212	unsafe fn test_mm512_setzero_ps() {
49213	assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(`0.`));
49214	}
49215
49216	#[simd_test(enable = "avx512f")]
49217	unsafe fn test_mm512_setzero() {
49218	assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(`0.`));
49219	}
49220
49221	#[simd_test(enable = "avx512f")]
49222	unsafe fn test_mm512_loadu_pd() {
49223	let a = &[`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`];
49224	let p = a.as_ptr();
49225	let r = _mm512_loadu_pd(black_box(p));
49226	let e = _mm512_setr_pd(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
49227	assert_eq_m512d(r, e);
49228	}
49229
49230	#[simd_test(enable = "avx512f")]
49231	unsafe fn test_mm512_storeu_pd() {
49232	let a = _mm512_set1_pd(`9.`);
49233	let mut r = _mm512_undefined_pd();
49234	_mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
49235	assert_eq_m512d(r, a);
49236	}
49237
49238	#[simd_test(enable = "avx512f")]
49239	unsafe fn test_mm512_loadu_ps() {
49240	let a = &[
49241	`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
49242	];
49243	let p = a.as_ptr();
49244	let r = _mm512_loadu_ps(black_box(p));
49245	let e = _mm512_setr_ps(
49246	`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
49247	);
49248	assert_eq_m512(r, e);
49249	}
49250
49251	#[simd_test(enable = "avx512f")]
49252	unsafe fn test_mm512_storeu_ps() {
49253	let a = _mm512_set1_ps(`9.`);
49254	let mut r = _mm512_undefined_ps();
49255	_mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
49256	assert_eq_m512(r, a);
49257	}
49258
49259	#[simd_test(enable = "avx512f")]
49260	unsafe fn test_mm512_mask_loadu_epi32() {
49261	let src = _mm512_set1_epi32(`42`);
49262	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
49263	let p = a.as_ptr();
49264	let m = `0b11101000_11001010`;
49265	let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
49266	let e = _mm512_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`, `42`, `42`, `42`, `12`, `42`, `14`, `15`, `16`);
49267	assert_eq_m512i(r, e);
49268	}
49269
49270	#[simd_test(enable = "avx512f")]
49271	unsafe fn test_mm512_maskz_loadu_epi32() {
49272	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
49273	let p = a.as_ptr();
49274	let m = `0b11101000_11001010`;
49275	let r = _mm512_maskz_loadu_epi32(m, black_box(p));
49276	let e = _mm512_setr_epi32(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`, `0`, `0`, `0`, `12`, `0`, `14`, `15`, `16`);
49277	assert_eq_m512i(r, e);
49278	}
49279
49280	#[simd_test(enable = "avx512f")]
49281	unsafe fn test_mm512_mask_load_epi32() {
49282	#[repr(align(`64`))]
49283	struct Align {
49284	data: [i32; `16`], // 64 bytes
49285	}
49286	let src = _mm512_set1_epi32(`42`);
49287	let a = Align {
49288	data: [`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`],
49289	};
49290	let p = a.data.as_ptr();
49291	let m = `0b11101000_11001010`;
49292	let r = _mm512_mask_load_epi32(src, m, black_box(p));
49293	let e = _mm512_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`, `42`, `42`, `42`, `12`, `42`, `14`, `15`, `16`);
49294	assert_eq_m512i(r, e);
49295	}
49296
49297	#[simd_test(enable = "avx512f")]
49298	unsafe fn test_mm512_maskz_load_epi32() {
49299	#[repr(align(`64`))]
49300	struct Align {
49301	data: [i32; `16`], // 64 bytes
49302	}
49303	let a = Align {
49304	data: [`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`],
49305	};
49306	let p = a.data.as_ptr();
49307	let m = `0b11101000_11001010`;
49308	let r = _mm512_maskz_load_epi32(m, black_box(p));
49309	let e = _mm512_setr_epi32(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`, `0`, `0`, `0`, `12`, `0`, `14`, `15`, `16`);
49310	assert_eq_m512i(r, e);
49311	}
49312
49313	#[simd_test(enable = "avx512f")]
49314	unsafe fn test_mm512_mask_storeu_epi32() {
49315	let mut r = [`42_i32`; `16`];
49316	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
49317	let m = `0b11101000_11001010`;
49318	_mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
49319	let e = _mm512_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`, `42`, `42`, `42`, `12`, `42`, `14`, `15`, `16`);
49320	assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
49321	}
49322
49323	#[simd_test(enable = "avx512f")]
49324	unsafe fn test_mm512_mask_store_epi32() {
49325	#[repr(align(`64`))]
49326	struct Align {
49327	data: [i32; `16`],
49328	}
49329	let mut r = Align { data: [`42`; `16`] };
49330	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
49331	let m = `0b11101000_11001010`;
49332	_mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
49333	let e = _mm512_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`, `42`, `42`, `42`, `12`, `42`, `14`, `15`, `16`);
49334	assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
49335	}
49336
49337	#[simd_test(enable = "avx512f")]
49338	unsafe fn test_mm512_mask_loadu_epi64() {
49339	let src = _mm512_set1_epi64(`42`);
49340	let a = &[`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
49341	let p = a.as_ptr();
49342	let m = `0b11001010`;
49343	let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
49344	let e = _mm512_setr_epi64(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
49345	assert_eq_m512i(r, e);
49346	}
49347
49348	#[simd_test(enable = "avx512f")]
49349	unsafe fn test_mm512_maskz_loadu_epi64() {
49350	let a = &[`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
49351	let p = a.as_ptr();
49352	let m = `0b11001010`;
49353	let r = _mm512_maskz_loadu_epi64(m, black_box(p));
49354	let e = _mm512_setr_epi64(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`);
49355	assert_eq_m512i(r, e);
49356	}
49357
49358	#[simd_test(enable = "avx512f")]
49359	unsafe fn test_mm512_mask_load_epi64() {
49360	#[repr(align(`64`))]
49361	struct Align {
49362	data: [i64; `8`], // 64 bytes
49363	}
49364	let src = _mm512_set1_epi64(`42`);
49365	let a = Align {
49366	data: [`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
49367	};
49368	let p = a.data.as_ptr();
49369	let m = `0b11001010`;
49370	let r = _mm512_mask_load_epi64(src, m, black_box(p));
49371	let e = _mm512_setr_epi64(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
49372	assert_eq_m512i(r, e);
49373	}
49374
49375	#[simd_test(enable = "avx512f")]
49376	unsafe fn test_mm512_maskz_load_epi64() {
49377	#[repr(align(`64`))]
49378	struct Align {
49379	data: [i64; `8`], // 64 bytes
49380	}
49381	let a = Align {
49382	data: [`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
49383	};
49384	let p = a.data.as_ptr();
49385	let m = `0b11001010`;
49386	let r = _mm512_maskz_load_epi64(m, black_box(p));
49387	let e = _mm512_setr_epi64(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`);
49388	assert_eq_m512i(r, e);
49389	}
49390
49391	#[simd_test(enable = "avx512f")]
49392	unsafe fn test_mm512_mask_storeu_epi64() {
49393	let mut r = [`42_i64`; `8`];
49394	let a = _mm512_setr_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
49395	let m = `0b11001010`;
49396	_mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
49397	let e = _mm512_setr_epi64(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
49398	assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
49399	}
49400
49401	#[simd_test(enable = "avx512f")]
49402	unsafe fn test_mm512_mask_store_epi64() {
49403	#[repr(align(`64`))]
49404	struct Align {
49405	data: [i64; `8`],
49406	}
49407	let mut r = Align { data: [`42`; `8`] };
49408	let a = _mm512_setr_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
49409	let m = `0b11001010`;
49410	let p = r.data.as_mut_ptr();
49411	_mm512_mask_store_epi64(p, m, a);
49412	let e = _mm512_setr_epi64(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
49413	assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
49414	}
49415
49416	#[simd_test(enable = "avx512f")]
49417	unsafe fn test_mm512_mask_loadu_ps() {
49418	let src = _mm512_set1_ps(`42.0`);
49419	let a = &[
49420	`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`,
49421	`16.0`,
49422	];
49423	let p = a.as_ptr();
49424	let m = `0b11101000_11001010`;
49425	let r = _mm512_mask_loadu_ps(src, m, black_box(p));
49426	let e = _mm512_setr_ps(
49427	`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`, `42.0`, `42.0`, `42.0`, `12.0`, `42.0`, `14.0`, `15.0`,
49428	`16.0`,
49429	);
49430	assert_eq_m512(r, e);
49431	}
49432
49433	#[simd_test(enable = "avx512f")]
49434	unsafe fn test_mm512_maskz_loadu_ps() {
49435	let a = &[
49436	`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`,
49437	`16.0`,
49438	];
49439	let p = a.as_ptr();
49440	let m = `0b11101000_11001010`;
49441	let r = _mm512_maskz_loadu_ps(m, black_box(p));
49442	let e = _mm512_setr_ps(
49443	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`, `0.0`, `0.0`, `0.0`, `12.0`, `0.0`, `14.0`, `15.0`, `16.0`,
49444	);
49445	assert_eq_m512(r, e);
49446	}
49447
49448	#[simd_test(enable = "avx512f")]
49449	unsafe fn test_mm512_mask_load_ps() {
49450	#[repr(align(`64`))]
49451	struct Align {
49452	data: [f32; `16`], // 64 bytes
49453	}
49454	let src = _mm512_set1_ps(`42.0`);
49455	let a = Align {
49456	data: [
49457	`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`,
49458	`15.0`, `16.0`,
49459	],
49460	};
49461	let p = a.data.as_ptr();
49462	let m = `0b11101000_11001010`;
49463	let r = _mm512_mask_load_ps(src, m, black_box(p));
49464	let e = _mm512_setr_ps(
49465	`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`, `42.0`, `42.0`, `42.0`, `12.0`, `42.0`, `14.0`, `15.0`,
49466	`16.0`,
49467	);
49468	assert_eq_m512(r, e);
49469	}
49470
49471	#[simd_test(enable = "avx512f")]
49472	unsafe fn test_mm512_maskz_load_ps() {
49473	#[repr(align(`64`))]
49474	struct Align {
49475	data: [f32; `16`], // 64 bytes
49476	}
49477	let a = Align {
49478	data: [
49479	`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`,
49480	`15.0`, `16.0`,
49481	],
49482	};
49483	let p = a.data.as_ptr();
49484	let m = `0b11101000_11001010`;
49485	let r = _mm512_maskz_load_ps(m, black_box(p));
49486	let e = _mm512_setr_ps(
49487	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`, `0.0`, `0.0`, `0.0`, `12.0`, `0.0`, `14.0`, `15.0`, `16.0`,
49488	);
49489	assert_eq_m512(r, e);
49490	}
49491
49492	#[simd_test(enable = "avx512f")]
49493	unsafe fn test_mm512_mask_storeu_ps() {
49494	let mut r = [`42_f32`; `16`];
49495	let a = _mm512_setr_ps(
49496	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
49497	);
49498	let m = `0b11101000_11001010`;
49499	_mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
49500	let e = _mm512_setr_ps(
49501	`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`, `42.0`, `42.0`, `42.0`, `12.0`, `42.0`, `14.0`, `15.0`,
49502	`16.0`,
49503	);
49504	assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
49505	}
49506
49507	#[simd_test(enable = "avx512f")]
49508	unsafe fn test_mm512_mask_store_ps() {
49509	#[repr(align(`64`))]
49510	struct Align {
49511	data: [f32; `16`],
49512	}
49513	let mut r = Align { data: [`42.0`; `16`] };
49514	let a = _mm512_setr_ps(
49515	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
49516	);
49517	let m = `0b11101000_11001010`;
49518	_mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
49519	let e = _mm512_setr_ps(
49520	`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`, `42.0`, `42.0`, `42.0`, `12.0`, `42.0`, `14.0`, `15.0`,
49521	`16.0`,
49522	);
49523	assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
49524	}
49525
49526	#[simd_test(enable = "avx512f")]
49527	unsafe fn test_mm512_mask_loadu_pd() {
49528	let src = _mm512_set1_pd(`42.0`);
49529	let a = &[`1.0_f64`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
49530	let p = a.as_ptr();
49531	let m = `0b11001010`;
49532	let r = _mm512_mask_loadu_pd(src, m, black_box(p));
49533	let e = _mm512_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
49534	assert_eq_m512d(r, e);
49535	}
49536
49537	#[simd_test(enable = "avx512f")]
49538	unsafe fn test_mm512_maskz_loadu_pd() {
49539	let a = &[`1.0_f64`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
49540	let p = a.as_ptr();
49541	let m = `0b11001010`;
49542	let r = _mm512_maskz_loadu_pd(m, black_box(p));
49543	let e = _mm512_setr_pd(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`);
49544	assert_eq_m512d(r, e);
49545	}
49546
49547	#[simd_test(enable = "avx512f")]
49548	unsafe fn test_mm512_mask_load_pd() {
49549	#[repr(align(`64`))]
49550	struct Align {
49551	data: [f64; `8`], // 64 bytes
49552	}
49553	let src = _mm512_set1_pd(`42.0`);
49554	let a = Align {
49555	data: [`1.0_f64`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`],
49556	};
49557	let p = a.data.as_ptr();
49558	let m = `0b11001010`;
49559	let r = _mm512_mask_load_pd(src, m, black_box(p));
49560	let e = _mm512_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
49561	assert_eq_m512d(r, e);
49562	}
49563
49564	#[simd_test(enable = "avx512f")]
49565	unsafe fn test_mm512_maskz_load_pd() {
49566	#[repr(align(`64`))]
49567	struct Align {
49568	data: [f64; `8`], // 64 bytes
49569	}
49570	let a = Align {
49571	data: [`1.0_f64`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`],
49572	};
49573	let p = a.data.as_ptr();
49574	let m = `0b11001010`;
49575	let r = _mm512_maskz_load_pd(m, black_box(p));
49576	let e = _mm512_setr_pd(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`);
49577	assert_eq_m512d(r, e);
49578	}
49579
49580	#[simd_test(enable = "avx512f")]
49581	unsafe fn test_mm512_mask_storeu_pd() {
49582	let mut r = [`42_f64`; `8`];
49583	let a = _mm512_setr_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
49584	let m = `0b11001010`;
49585	_mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
49586	let e = _mm512_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
49587	assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
49588	}
49589
49590	#[simd_test(enable = "avx512f")]
49591	unsafe fn test_mm512_mask_store_pd() {
49592	#[repr(align(`64`))]
49593	struct Align {
49594	data: [f64; `8`],
49595	}
49596	let mut r = Align { data: [`42.0`; `8`] };
49597	let a = _mm512_setr_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
49598	let m = `0b11001010`;
49599	_mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
49600	let e = _mm512_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
49601	assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
49602	}
49603
49604	#[simd_test(enable = "avx512f,avx512vl")]
49605	unsafe fn test_mm256_mask_loadu_epi32() {
49606	let src = _mm256_set1_epi32(`42`);
49607	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
49608	let p = a.as_ptr();
49609	let m = `0b11001010`;
49610	let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
49611	let e = _mm256_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
49612	assert_eq_m256i(r, e);
49613	}
49614
49615	#[simd_test(enable = "avx512f,avx512vl")]
49616	unsafe fn test_mm256_maskz_loadu_epi32() {
49617	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
49618	let p = a.as_ptr();
49619	let m = `0b11001010`;
49620	let r = _mm256_maskz_loadu_epi32(m, black_box(p));
49621	let e = _mm256_setr_epi32(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`);
49622	assert_eq_m256i(r, e);
49623	}
49624
49625	#[simd_test(enable = "avx512f,avx512vl")]
49626	unsafe fn test_mm256_mask_load_epi32() {
49627	#[repr(align(`32`))]
49628	struct Align {
49629	data: [i32; `8`], // 32 bytes
49630	}
49631	let src = _mm256_set1_epi32(`42`);
49632	let a = Align {
49633	data: [`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
49634	};
49635	let p = a.data.as_ptr();
49636	let m = `0b11001010`;
49637	let r = _mm256_mask_load_epi32(src, m, black_box(p));
49638	let e = _mm256_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
49639	assert_eq_m256i(r, e);
49640	}
49641
49642	#[simd_test(enable = "avx512f,avx512vl")]
49643	unsafe fn test_mm256_maskz_load_epi32() {
49644	#[repr(align(`32`))]
49645	struct Align {
49646	data: [i32; `8`], // 32 bytes
49647	}
49648	let a = Align {
49649	data: [`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
49650	};
49651	let p = a.data.as_ptr();
49652	let m = `0b11001010`;
49653	let r = _mm256_maskz_load_epi32(m, black_box(p));
49654	let e = _mm256_setr_epi32(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`);
49655	assert_eq_m256i(r, e);
49656	}
49657
49658	#[simd_test(enable = "avx512f,avx512vl")]
49659	unsafe fn test_mm256_mask_storeu_epi32() {
49660	let mut r = [`42_i32`; `8`];
49661	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
49662	let m = `0b11001010`;
49663	_mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
49664	let e = _mm256_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
49665	assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
49666	}
49667
49668	#[simd_test(enable = "avx512f,avx512vl")]
49669	unsafe fn test_mm256_mask_store_epi32() {
49670	#[repr(align(`64`))]
49671	struct Align {
49672	data: [i32; `8`],
49673	}
49674	let mut r = Align { data: [`42`; `8`] };
49675	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
49676	let m = `0b11001010`;
49677	_mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
49678	let e = _mm256_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
49679	assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
49680	}
49681
49682	#[simd_test(enable = "avx512f,avx512vl")]
49683	unsafe fn test_mm256_mask_loadu_epi64() {
49684	let src = _mm256_set1_epi64x(`42`);
49685	let a = &[`1_i64`, `2`, `3`, `4`];
49686	let p = a.as_ptr();
49687	let m = `0b1010`;
49688	let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
49689	let e = _mm256_setr_epi64x(`42`, `2`, `42`, `4`);
49690	assert_eq_m256i(r, e);
49691	}
49692
49693	#[simd_test(enable = "avx512f,avx512vl")]
49694	unsafe fn test_mm256_maskz_loadu_epi64() {
49695	let a = &[`1_i64`, `2`, `3`, `4`];
49696	let p = a.as_ptr();
49697	let m = `0b1010`;
49698	let r = _mm256_maskz_loadu_epi64(m, black_box(p));
49699	let e = _mm256_setr_epi64x(`0`, `2`, `0`, `4`);
49700	assert_eq_m256i(r, e);
49701	}
49702
49703	#[simd_test(enable = "avx512f,avx512vl")]
49704	unsafe fn test_mm256_mask_load_epi64() {
49705	#[repr(align(`32`))]
49706	struct Align {
49707	data: [i64; `4`], // 32 bytes
49708	}
49709	let src = _mm256_set1_epi64x(`42`);
49710	let a = Align {
49711	data: [`1_i64`, `2`, `3`, `4`],
49712	};
49713	let p = a.data.as_ptr();
49714	let m = `0b1010`;
49715	let r = _mm256_mask_load_epi64(src, m, black_box(p));
49716	let e = _mm256_setr_epi64x(`42`, `2`, `42`, `4`);
49717	assert_eq_m256i(r, e);
49718	}
49719
49720	#[simd_test(enable = "avx512f,avx512vl")]
49721	unsafe fn test_mm256_maskz_load_epi64() {
49722	#[repr(align(`32`))]
49723	struct Align {
49724	data: [i64; `4`], // 32 bytes
49725	}
49726	let a = Align {
49727	data: [`1_i64`, `2`, `3`, `4`],
49728	};
49729	let p = a.data.as_ptr();
49730	let m = `0b1010`;
49731	let r = _mm256_maskz_load_epi64(m, black_box(p));
49732	let e = _mm256_setr_epi64x(`0`, `2`, `0`, `4`);
49733	assert_eq_m256i(r, e);
49734	}
49735
49736	#[simd_test(enable = "avx512f,avx512vl")]
49737	unsafe fn test_mm256_mask_storeu_epi64() {
49738	let mut r = [`42_i64`; `4`];
49739	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
49740	let m = `0b1010`;
49741	_mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
49742	let e = _mm256_setr_epi64x(`42`, `2`, `42`, `4`);
49743	assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
49744	}
49745
49746	#[simd_test(enable = "avx512f,avx512vl")]
49747	unsafe fn test_mm256_mask_store_epi64() {
49748	#[repr(align(`32`))]
49749	struct Align {
49750	data: [i64; `4`],
49751	}
49752	let mut r = Align { data: [`42`; `4`] };
49753	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
49754	let m = `0b1010`;
49755	_mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
49756	let e = _mm256_setr_epi64x(`42`, `2`, `42`, `4`);
49757	assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
49758	}
49759
49760	#[simd_test(enable = "avx512f,avx512vl")]
49761	unsafe fn test_mm256_mask_loadu_ps() {
49762	let src = _mm256_set1_ps(`42.0`);
49763	let a = &[`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
49764	let p = a.as_ptr();
49765	let m = `0b11001010`;
49766	let r = _mm256_mask_loadu_ps(src, m, black_box(p));
49767	let e = _mm256_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
49768	assert_eq_m256(r, e);
49769	}
49770
49771	#[simd_test(enable = "avx512f,avx512vl")]
49772	unsafe fn test_mm256_maskz_loadu_ps() {
49773	let a = &[`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
49774	let p = a.as_ptr();
49775	let m = `0b11001010`;
49776	let r = _mm256_maskz_loadu_ps(m, black_box(p));
49777	let e = _mm256_setr_ps(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`);
49778	assert_eq_m256(r, e);
49779	}
49780
49781	#[simd_test(enable = "avx512f,avx512vl")]
49782	unsafe fn test_mm256_mask_load_ps() {
49783	#[repr(align(`32`))]
49784	struct Align {
49785	data: [f32; `8`], // 32 bytes
49786	}
49787	let src = _mm256_set1_ps(`42.0`);
49788	let a = Align {
49789	data: [`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`],
49790	};
49791	let p = a.data.as_ptr();
49792	let m = `0b11001010`;
49793	let r = _mm256_mask_load_ps(src, m, black_box(p));
49794	let e = _mm256_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
49795	assert_eq_m256(r, e);
49796	}
49797
49798	#[simd_test(enable = "avx512f,avx512vl")]
49799	unsafe fn test_mm256_maskz_load_ps() {
49800	#[repr(align(`32`))]
49801	struct Align {
49802	data: [f32; `8`], // 32 bytes
49803	}
49804	let a = Align {
49805	data: [`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`],
49806	};
49807	let p = a.data.as_ptr();
49808	let m = `0b11001010`;
49809	let r = _mm256_maskz_load_ps(m, black_box(p));
49810	let e = _mm256_setr_ps(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`);
49811	assert_eq_m256(r, e);
49812	}
49813
49814	#[simd_test(enable = "avx512f,avx512vl")]
49815	unsafe fn test_mm256_mask_storeu_ps() {
49816	let mut r = [`42_f32`; `8`];
49817	let a = _mm256_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
49818	let m = `0b11001010`;
49819	_mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
49820	let e = _mm256_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
49821	assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
49822	}
49823
49824	#[simd_test(enable = "avx512f,avx512vl")]
49825	unsafe fn test_mm256_mask_store_ps() {
49826	#[repr(align(`32`))]
49827	struct Align {
49828	data: [f32; `8`],
49829	}
49830	let mut r = Align { data: [`42.0`; `8`] };
49831	let a = _mm256_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
49832	let m = `0b11001010`;
49833	_mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
49834	let e = _mm256_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
49835	assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
49836	}
49837
49838	#[simd_test(enable = "avx512f,avx512vl")]
49839	unsafe fn test_mm256_mask_loadu_pd() {
49840	let src = _mm256_set1_pd(`42.0`);
49841	let a = &[`1.0_f64`, `2.0`, `3.0`, `4.0`];
49842	let p = a.as_ptr();
49843	let m = `0b1010`;
49844	let r = _mm256_mask_loadu_pd(src, m, black_box(p));
49845	let e = _mm256_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`);
49846	assert_eq_m256d(r, e);
49847	}
49848
49849	#[simd_test(enable = "avx512f,avx512vl")]
49850	unsafe fn test_mm256_maskz_loadu_pd() {
49851	let a = &[`1.0_f64`, `2.0`, `3.0`, `4.0`];
49852	let p = a.as_ptr();
49853	let m = `0b1010`;
49854	let r = _mm256_maskz_loadu_pd(m, black_box(p));
49855	let e = _mm256_setr_pd(`0.0`, `2.0`, `0.0`, `4.0`);
49856	assert_eq_m256d(r, e);
49857	}
49858
49859	#[simd_test(enable = "avx512f,avx512vl")]
49860	unsafe fn test_mm256_mask_load_pd() {
49861	#[repr(align(`32`))]
49862	struct Align {
49863	data: [f64; `4`], // 32 bytes
49864	}
49865	let src = _mm256_set1_pd(`42.0`);
49866	let a = Align {
49867	data: [`1.0_f64`, `2.0`, `3.0`, `4.0`],
49868	};
49869	let p = a.data.as_ptr();
49870	let m = `0b1010`;
49871	let r = _mm256_mask_load_pd(src, m, black_box(p));
49872	let e = _mm256_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`);
49873	assert_eq_m256d(r, e);
49874	}
49875
49876	#[simd_test(enable = "avx512f,avx512vl")]
49877	unsafe fn test_mm256_maskz_load_pd() {
49878	#[repr(align(`32`))]
49879	struct Align {
49880	data: [f64; `4`], // 32 bytes
49881	}
49882	let a = Align {
49883	data: [`1.0_f64`, `2.0`, `3.0`, `4.0`],
49884	};
49885	let p = a.data.as_ptr();
49886	let m = `0b1010`;
49887	let r = _mm256_maskz_load_pd(m, black_box(p));
49888	let e = _mm256_setr_pd(`0.0`, `2.0`, `0.0`, `4.0`);
49889	assert_eq_m256d(r, e);
49890	}
49891
49892	#[simd_test(enable = "avx512f,avx512vl")]
49893	unsafe fn test_mm256_mask_storeu_pd() {
49894	let mut r = [`42_f64`; `4`];
49895	let a = _mm256_setr_pd(`1.0`, `2.0`, `3.0`, `4.0`);
49896	let m = `0b1010`;
49897	_mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
49898	let e = _mm256_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`);
49899	assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
49900	}
49901
49902	#[simd_test(enable = "avx512f,avx512vl")]
49903	unsafe fn test_mm256_mask_store_pd() {
49904	#[repr(align(`32`))]
49905	struct Align {
49906	data: [f64; `4`],
49907	}
49908	let mut r = Align { data: [`42.0`; `4`] };
49909	let a = _mm256_setr_pd(`1.0`, `2.0`, `3.0`, `4.0`);
49910	let m = `0b1010`;
49911	_mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
49912	let e = _mm256_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`);
49913	assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
49914	}
49915
49916	#[simd_test(enable = "avx512f,avx512vl")]
49917	unsafe fn test_mm_mask_loadu_epi32() {
49918	let src = _mm_set1_epi32(`42`);
49919	let a = &[`1_i32`, `2`, `3`, `4`];
49920	let p = a.as_ptr();
49921	let m = `0b1010`;
49922	let r = _mm_mask_loadu_epi32(src, m, black_box(p));
49923	let e = _mm_setr_epi32(`42`, `2`, `42`, `4`);
49924	assert_eq_m128i(r, e);
49925	}
49926
49927	#[simd_test(enable = "avx512f,avx512vl")]
49928	unsafe fn test_mm_maskz_loadu_epi32() {
49929	let a = &[`1_i32`, `2`, `3`, `4`];
49930	let p = a.as_ptr();
49931	let m = `0b1010`;
49932	let r = _mm_maskz_loadu_epi32(m, black_box(p));
49933	let e = _mm_setr_epi32(`0`, `2`, `0`, `4`);
49934	assert_eq_m128i(r, e);
49935	}
49936
49937	#[simd_test(enable = "avx512f,avx512vl")]
49938	unsafe fn test_mm_mask_load_epi32() {
49939	#[repr(align(`16`))]
49940	struct Align {
49941	data: [i32; `4`], // 32 bytes
49942	}
49943	let src = _mm_set1_epi32(`42`);
49944	let a = Align {
49945	data: [`1_i32`, `2`, `3`, `4`],
49946	};
49947	let p = a.data.as_ptr();
49948	let m = `0b1010`;
49949	let r = _mm_mask_load_epi32(src, m, black_box(p));
49950	let e = _mm_setr_epi32(`42`, `2`, `42`, `4`);
49951	assert_eq_m128i(r, e);
49952	}
49953
49954	#[simd_test(enable = "avx512f,avx512vl")]
49955	unsafe fn test_mm_maskz_load_epi32() {
49956	#[repr(align(`16`))]
49957	struct Align {
49958	data: [i32; `4`], // 16 bytes
49959	}
49960	let a = Align {
49961	data: [`1_i32`, `2`, `3`, `4`],
49962	};
49963	let p = a.data.as_ptr();
49964	let m = `0b1010`;
49965	let r = _mm_maskz_load_epi32(m, black_box(p));
49966	let e = _mm_setr_epi32(`0`, `2`, `0`, `4`);
49967	assert_eq_m128i(r, e);
49968	}
49969
49970	#[simd_test(enable = "avx512f,avx512vl")]
49971	unsafe fn test_mm_mask_storeu_epi32() {
49972	let mut r = [`42_i32`; `4`];
49973	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
49974	let m = `0b1010`;
49975	_mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
49976	let e = _mm_setr_epi32(`42`, `2`, `42`, `4`);
49977	assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
49978	}
49979
49980	#[simd_test(enable = "avx512f,avx512vl")]
49981	unsafe fn test_mm_mask_store_epi32() {
49982	#[repr(align(`16`))]
49983	struct Align {
49984	data: [i32; `4`], // 16 bytes
49985	}
49986	let mut r = Align { data: [`42`; `4`] };
49987	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
49988	let m = `0b1010`;
49989	_mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
49990	let e = _mm_setr_epi32(`42`, `2`, `42`, `4`);
49991	assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
49992	}
49993
49994	#[simd_test(enable = "avx512f,avx512vl")]
49995	unsafe fn test_mm_mask_loadu_epi64() {
49996	let src = _mm_set1_epi64x(`42`);
49997	let a = &[`1_i64`, `2`];
49998	let p = a.as_ptr();
49999	let m = `0b10`;
50000	let r = _mm_mask_loadu_epi64(src, m, black_box(p));
50001	let e = _mm_setr_epi64x(`42`, `2`);
50002	assert_eq_m128i(r, e);
50003	}
50004
50005	#[simd_test(enable = "avx512f,avx512vl")]
50006	unsafe fn test_mm_maskz_loadu_epi64() {
50007	let a = &[`1_i64`, `2`];
50008	let p = a.as_ptr();
50009	let m = `0b10`;
50010	let r = _mm_maskz_loadu_epi64(m, black_box(p));
50011	let e = _mm_setr_epi64x(`0`, `2`);
50012	assert_eq_m128i(r, e);
50013	}
50014
50015	#[simd_test(enable = "avx512f,avx512vl")]
50016	unsafe fn test_mm_mask_load_epi64() {
50017	#[repr(align(`16`))]
50018	struct Align {
50019	data: [i64; `2`], // 16 bytes
50020	}
50021	let src = _mm_set1_epi64x(`42`);
50022	let a = Align { data: [`1_i64`, `2`] };
50023	let p = a.data.as_ptr();
50024	let m = `0b10`;
50025	let r = _mm_mask_load_epi64(src, m, black_box(p));
50026	let e = _mm_setr_epi64x(`42`, `2`);
50027	assert_eq_m128i(r, e);
50028	}
50029
50030	#[simd_test(enable = "avx512f,avx512vl")]
50031	unsafe fn test_mm_maskz_load_epi64() {
50032	#[repr(align(`16`))]
50033	struct Align {
50034	data: [i64; `2`], // 16 bytes
50035	}
50036	let a = Align { data: [`1_i64`, `2`] };
50037	let p = a.data.as_ptr();
50038	let m = `0b10`;
50039	let r = _mm_maskz_load_epi64(m, black_box(p));
50040	let e = _mm_setr_epi64x(`0`, `2`);
50041	assert_eq_m128i(r, e);
50042	}
50043
50044	#[simd_test(enable = "avx512f,avx512vl")]
50045	unsafe fn test_mm_mask_storeu_epi64() {
50046	let mut r = [`42_i64`; `2`];
50047	let a = _mm_setr_epi64x(`1`, `2`);
50048	let m = `0b10`;
50049	_mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
50050	let e = _mm_setr_epi64x(`42`, `2`);
50051	assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
50052	}
50053
50054	#[simd_test(enable = "avx512f,avx512vl")]
50055	unsafe fn test_mm_mask_store_epi64() {
50056	#[repr(align(`16`))]
50057	struct Align {
50058	data: [i64; `2`], // 16 bytes
50059	}
50060	let mut r = Align { data: [`42`; `2`] };
50061	let a = _mm_setr_epi64x(`1`, `2`);
50062	let m = `0b10`;
50063	_mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
50064	let e = _mm_setr_epi64x(`42`, `2`);
50065	assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
50066	}
50067
50068	#[simd_test(enable = "avx512f,avx512vl")]
50069	unsafe fn test_mm_mask_loadu_ps() {
50070	let src = _mm_set1_ps(`42.0`);
50071	let a = &[`1.0_f32`, `2.0`, `3.0`, `4.0`];
50072	let p = a.as_ptr();
50073	let m = `0b1010`;
50074	let r = _mm_mask_loadu_ps(src, m, black_box(p));
50075	let e = _mm_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`);
50076	assert_eq_m128(r, e);
50077	}
50078
50079	#[simd_test(enable = "avx512f,avx512vl")]
50080	unsafe fn test_mm_maskz_loadu_ps() {
50081	let a = &[`1.0_f32`, `2.0`, `3.0`, `4.0`];
50082	let p = a.as_ptr();
50083	let m = `0b1010`;
50084	let r = _mm_maskz_loadu_ps(m, black_box(p));
50085	let e = _mm_setr_ps(`0.0`, `2.0`, `0.0`, `4.0`);
50086	assert_eq_m128(r, e);
50087	}
50088
50089	#[simd_test(enable = "avx512f,avx512vl")]
50090	unsafe fn test_mm_mask_load_ps() {
50091	#[repr(align(`16`))]
50092	struct Align {
50093	data: [f32; `4`], // 16 bytes
50094	}
50095	let src = _mm_set1_ps(`42.0`);
50096	let a = Align {
50097	data: [`1.0_f32`, `2.0`, `3.0`, `4.0`],
50098	};
50099	let p = a.data.as_ptr();
50100	let m = `0b1010`;
50101	let r = _mm_mask_load_ps(src, m, black_box(p));
50102	let e = _mm_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`);
50103	assert_eq_m128(r, e);
50104	}
50105
50106	#[simd_test(enable = "avx512f,avx512vl")]
50107	unsafe fn test_mm_maskz_load_ps() {
50108	#[repr(align(`16`))]
50109	struct Align {
50110	data: [f32; `4`], // 16 bytes
50111	}
50112	let a = Align {
50113	data: [`1.0_f32`, `2.0`, `3.0`, `4.0`],
50114	};
50115	let p = a.data.as_ptr();
50116	let m = `0b1010`;
50117	let r = _mm_maskz_load_ps(m, black_box(p));
50118	let e = _mm_setr_ps(`0.0`, `2.0`, `0.0`, `4.0`);
50119	assert_eq_m128(r, e);
50120	}
50121
50122	#[simd_test(enable = "avx512f,avx512vl")]
50123	unsafe fn test_mm_mask_storeu_ps() {
50124	let mut r = [`42_f32`; `4`];
50125	let a = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
50126	let m = `0b1010`;
50127	_mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
50128	let e = _mm_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`);
50129	assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
50130	}
50131
50132	#[simd_test(enable = "avx512f,avx512vl")]
50133	unsafe fn test_mm_mask_store_ps() {
50134	#[repr(align(`16`))]
50135	struct Align {
50136	data: [f32; `4`], // 16 bytes
50137	}
50138	let mut r = Align { data: [`42.0`; `4`] };
50139	let a = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
50140	let m = `0b1010`;
50141	_mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
50142	let e = _mm_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`);
50143	assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
50144	}
50145
50146	#[simd_test(enable = "avx512f,avx512vl")]
50147	unsafe fn test_mm_mask_loadu_pd() {
50148	let src = _mm_set1_pd(`42.0`);
50149	let a = &[`1.0_f64`, `2.0`];
50150	let p = a.as_ptr();
50151	let m = `0b10`;
50152	let r = _mm_mask_loadu_pd(src, m, black_box(p));
50153	let e = _mm_setr_pd(`42.0`, `2.0`);
50154	assert_eq_m128d(r, e);
50155	}
50156
50157	#[simd_test(enable = "avx512f,avx512vl")]
50158	unsafe fn test_mm_maskz_loadu_pd() {
50159	let a = &[`1.0_f64`, `2.0`];
50160	let p = a.as_ptr();
50161	let m = `0b10`;
50162	let r = _mm_maskz_loadu_pd(m, black_box(p));
50163	let e = _mm_setr_pd(`0.0`, `2.0`);
50164	assert_eq_m128d(r, e);
50165	}
50166
50167	#[simd_test(enable = "avx512f,avx512vl")]
50168	unsafe fn test_mm_mask_load_pd() {
50169	#[repr(align(`16`))]
50170	struct Align {
50171	data: [f64; `2`], // 16 bytes
50172	}
50173	let src = _mm_set1_pd(`42.0`);
50174	let a = Align {
50175	data: [`1.0_f64`, `2.0`],
50176	};
50177	let p = a.data.as_ptr();
50178	let m = `0b10`;
50179	let r = _mm_mask_load_pd(src, m, black_box(p));
50180	let e = _mm_setr_pd(`42.0`, `2.0`);
50181	assert_eq_m128d(r, e);
50182	}
50183
50184	#[simd_test(enable = "avx512f,avx512vl")]
50185	unsafe fn test_mm_maskz_load_pd() {
50186	#[repr(align(`16`))]
50187	struct Align {
50188	data: [f64; `2`], // 16 bytes
50189	}
50190	let a = Align {
50191	data: [`1.0_f64`, `2.0`],
50192	};
50193	let p = a.data.as_ptr();
50194	let m = `0b10`;
50195	let r = _mm_maskz_load_pd(m, black_box(p));
50196	let e = _mm_setr_pd(`0.0`, `2.0`);
50197	assert_eq_m128d(r, e);
50198	}
50199
50200	#[simd_test(enable = "avx512f,avx512vl")]
50201	unsafe fn test_mm_mask_storeu_pd() {
50202	let mut r = [`42_f64`; `2`];
50203	let a = _mm_setr_pd(`1.0`, `2.0`);
50204	let m = `0b10`;
50205	_mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
50206	let e = _mm_setr_pd(`42.0`, `2.0`);
50207	assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
50208	}
50209
50210	#[simd_test(enable = "avx512f,avx512vl")]
50211	unsafe fn test_mm_mask_store_pd() {
50212	#[repr(align(`16`))]
50213	struct Align {
50214	data: [f64; `2`], // 16 bytes
50215	}
50216	let mut r = Align { data: [`42.0`; `2`] };
50217	let a = _mm_setr_pd(`1.0`, `2.0`);
50218	let m = `0b10`;
50219	_mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
50220	let e = _mm_setr_pd(`42.0`, `2.0`);
50221	assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
50222	}
50223
50224	#[simd_test(enable = "avx512f")]
50225	unsafe fn test_mm512_setr_pd() {
50226	let r = _mm512_set_pd(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
50227	assert_eq_m512d(r, _mm512_setr_pd(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`));
50228	}
50229
50230	#[simd_test(enable = "avx512f")]
50231	unsafe fn test_mm512_set_pd() {
50232	let r = _mm512_setr_pd(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
50233	assert_eq_m512d(r, _mm512_set_pd(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`));
50234	}
50235
50236	#[simd_test(enable = "avx512f")]
50237	unsafe fn test_mm512_rol_epi32() {
50238	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50239	let r = _mm512_rol_epi32::<`1`>(a);
50240	let e = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50241	assert_eq_m512i(r, e);
50242	}
50243
50244	#[simd_test(enable = "avx512f")]
50245	unsafe fn test_mm512_mask_rol_epi32() {
50246	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50247	let r = _mm512_mask_rol_epi32::<`1`>(a, `0`, a);
50248	assert_eq_m512i(r, a);
50249	let r = _mm512_mask_rol_epi32::<`1`>(a, `0b11111111_11111111`, a);
50250	let e = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50251	assert_eq_m512i(r, e);
50252	}
50253
50254	#[simd_test(enable = "avx512f")]
50255	unsafe fn test_mm512_maskz_rol_epi32() {
50256	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
50257	let r = _mm512_maskz_rol_epi32::<`1`>(`0`, a);
50258	assert_eq_m512i(r, _mm512_setzero_si512());
50259	let r = _mm512_maskz_rol_epi32::<`1`>(`0b00000000_11111111`, a);
50260	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1` << `0`);
50261	assert_eq_m512i(r, e);
50262	}
50263
50264	#[simd_test(enable = "avx512f,avx512vl")]
50265	unsafe fn test_mm256_rol_epi32() {
50266	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50267	let r = _mm256_rol_epi32::<`1`>(a);
50268	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50269	assert_eq_m256i(r, e);
50270	}
50271
50272	#[simd_test(enable = "avx512f,avx512vl")]
50273	unsafe fn test_mm256_mask_rol_epi32() {
50274	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50275	let r = _mm256_mask_rol_epi32::<`1`>(a, `0`, a);
50276	assert_eq_m256i(r, a);
50277	let r = _mm256_mask_rol_epi32::<`1`>(a, `0b11111111`, a);
50278	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50279	assert_eq_m256i(r, e);
50280	}
50281
50282	#[simd_test(enable = "avx512f,avx512vl")]
50283	unsafe fn test_mm256_maskz_rol_epi32() {
50284	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50285	let r = _mm256_maskz_rol_epi32::<`1`>(`0`, a);
50286	assert_eq_m256i(r, _mm256_setzero_si256());
50287	let r = _mm256_maskz_rol_epi32::<`1`>(`0b11111111`, a);
50288	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50289	assert_eq_m256i(r, e);
50290	}
50291
50292	#[simd_test(enable = "avx512f,avx512vl")]
50293	unsafe fn test_mm_rol_epi32() {
50294	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50295	let r = _mm_rol_epi32::<`1`>(a);
50296	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
50297	assert_eq_m128i(r, e);
50298	}
50299
50300	#[simd_test(enable = "avx512f,avx512vl")]
50301	unsafe fn test_mm_mask_rol_epi32() {
50302	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50303	let r = _mm_mask_rol_epi32::<`1`>(a, `0`, a);
50304	assert_eq_m128i(r, a);
50305	let r = _mm_mask_rol_epi32::<`1`>(a, `0b00001111`, a);
50306	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
50307	assert_eq_m128i(r, e);
50308	}
50309
50310	#[simd_test(enable = "avx512f,avx512vl")]
50311	unsafe fn test_mm_maskz_rol_epi32() {
50312	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50313	let r = _mm_maskz_rol_epi32::<`1`>(`0`, a);
50314	assert_eq_m128i(r, _mm_setzero_si128());
50315	let r = _mm_maskz_rol_epi32::<`1`>(`0b00001111`, a);
50316	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
50317	assert_eq_m128i(r, e);
50318	}
50319
50320	#[simd_test(enable = "avx512f")]
50321	unsafe fn test_mm512_ror_epi32() {
50322	let a = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50323	let r = _mm512_ror_epi32::<`1`>(a);
50324	let e = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50325	assert_eq_m512i(r, e);
50326	}
50327
50328	#[simd_test(enable = "avx512f")]
50329	unsafe fn test_mm512_mask_ror_epi32() {
50330	let a = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50331	let r = _mm512_mask_ror_epi32::<`1`>(a, `0`, a);
50332	assert_eq_m512i(r, a);
50333	let r = _mm512_mask_ror_epi32::<`1`>(a, `0b11111111_11111111`, a);
50334	let e = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50335	assert_eq_m512i(r, e);
50336	}
50337
50338	#[simd_test(enable = "avx512f")]
50339	unsafe fn test_mm512_maskz_ror_epi32() {
50340	let a = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1` << `0`);
50341	let r = _mm512_maskz_ror_epi32::<`1`>(`0`, a);
50342	assert_eq_m512i(r, _mm512_setzero_si512());
50343	let r = _mm512_maskz_ror_epi32::<`1`>(`0b00000000_11111111`, a);
50344	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
50345	assert_eq_m512i(r, e);
50346	}
50347
50348	#[simd_test(enable = "avx512f,avx512vl")]
50349	unsafe fn test_mm256_ror_epi32() {
50350	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50351	let r = _mm256_ror_epi32::<`1`>(a);
50352	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50353	assert_eq_m256i(r, e);
50354	}
50355
50356	#[simd_test(enable = "avx512f,avx512vl")]
50357	unsafe fn test_mm256_mask_ror_epi32() {
50358	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50359	let r = _mm256_mask_ror_epi32::<`1`>(a, `0`, a);
50360	assert_eq_m256i(r, a);
50361	let r = _mm256_mask_ror_epi32::<`1`>(a, `0b11111111`, a);
50362	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50363	assert_eq_m256i(r, e);
50364	}
50365
50366	#[simd_test(enable = "avx512f,avx512vl")]
50367	unsafe fn test_mm256_maskz_ror_epi32() {
50368	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50369	let r = _mm256_maskz_ror_epi32::<`1`>(`0`, a);
50370	assert_eq_m256i(r, _mm256_setzero_si256());
50371	let r = _mm256_maskz_ror_epi32::<`1`>(`0b11111111`, a);
50372	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50373	assert_eq_m256i(r, e);
50374	}
50375
50376	#[simd_test(enable = "avx512f,avx512vl")]
50377	unsafe fn test_mm_ror_epi32() {
50378	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
50379	let r = _mm_ror_epi32::<`1`>(a);
50380	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50381	assert_eq_m128i(r, e);
50382	}
50383
50384	#[simd_test(enable = "avx512f,avx512vl")]
50385	unsafe fn test_mm_mask_ror_epi32() {
50386	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
50387	let r = _mm_mask_ror_epi32::<`1`>(a, `0`, a);
50388	assert_eq_m128i(r, a);
50389	let r = _mm_mask_ror_epi32::<`1`>(a, `0b00001111`, a);
50390	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50391	assert_eq_m128i(r, e);
50392	}
50393
50394	#[simd_test(enable = "avx512f,avx512vl")]
50395	unsafe fn test_mm_maskz_ror_epi32() {
50396	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
50397	let r = _mm_maskz_ror_epi32::<`1`>(`0`, a);
50398	assert_eq_m128i(r, _mm_setzero_si128());
50399	let r = _mm_maskz_ror_epi32::<`1`>(`0b00001111`, a);
50400	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50401	assert_eq_m128i(r, e);
50402	}
50403
50404	#[simd_test(enable = "avx512f")]
50405	unsafe fn test_mm512_slli_epi32() {
50406	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50407	let r = _mm512_slli_epi32::<`1`>(a);
50408	let e = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50409	assert_eq_m512i(r, e);
50410	}
50411
50412	#[simd_test(enable = "avx512f")]
50413	unsafe fn test_mm512_mask_slli_epi32() {
50414	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50415	let r = _mm512_mask_slli_epi32::<`1`>(a, `0`, a);
50416	assert_eq_m512i(r, a);
50417	let r = _mm512_mask_slli_epi32::<`1`>(a, `0b11111111_11111111`, a);
50418	let e = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50419	assert_eq_m512i(r, e);
50420	}
50421
50422	#[simd_test(enable = "avx512f")]
50423	unsafe fn test_mm512_maskz_slli_epi32() {
50424	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
50425	let r = _mm512_maskz_slli_epi32::<`1`>(`0`, a);
50426	assert_eq_m512i(r, _mm512_setzero_si512());
50427	let r = _mm512_maskz_slli_epi32::<`1`>(`0b00000000_11111111`, a);
50428	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `0`);
50429	assert_eq_m512i(r, e);
50430	}
50431
50432	#[simd_test(enable = "avx512f,avx512vl")]
50433	unsafe fn test_mm256_mask_slli_epi32() {
50434	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50435	let r = _mm256_mask_slli_epi32::<`1`>(a, `0`, a);
50436	assert_eq_m256i(r, a);
50437	let r = _mm256_mask_slli_epi32::<`1`>(a, `0b11111111`, a);
50438	let e = _mm256_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50439	assert_eq_m256i(r, e);
50440	}
50441
50442	#[simd_test(enable = "avx512f,avx512vl")]
50443	unsafe fn test_mm256_maskz_slli_epi32() {
50444	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50445	let r = _mm256_maskz_slli_epi32::<`1`>(`0`, a);
50446	assert_eq_m256i(r, _mm256_setzero_si256());
50447	let r = _mm256_maskz_slli_epi32::<`1`>(`0b11111111`, a);
50448	let e = _mm256_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50449	assert_eq_m256i(r, e);
50450	}
50451
50452	#[simd_test(enable = "avx512f,avx512vl")]
50453	unsafe fn test_mm_mask_slli_epi32() {
50454	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50455	let r = _mm_mask_slli_epi32::<`1`>(a, `0`, a);
50456	assert_eq_m128i(r, a);
50457	let r = _mm_mask_slli_epi32::<`1`>(a, `0b00001111`, a);
50458	let e = _mm_set_epi32(`0`, `2`, `2`, `2`);
50459	assert_eq_m128i(r, e);
50460	}
50461
50462	#[simd_test(enable = "avx512f,avx512vl")]
50463	unsafe fn test_mm_maskz_slli_epi32() {
50464	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50465	let r = _mm_maskz_slli_epi32::<`1`>(`0`, a);
50466	assert_eq_m128i(r, _mm_setzero_si128());
50467	let r = _mm_maskz_slli_epi32::<`1`>(`0b00001111`, a);
50468	let e = _mm_set_epi32(`0`, `2`, `2`, `2`);
50469	assert_eq_m128i(r, e);
50470	}
50471
50472	#[simd_test(enable = "avx512f")]
50473	unsafe fn test_mm512_srli_epi32() {
50474	let a = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50475	let r = _mm512_srli_epi32::<`1`>(a);
50476	let e = _mm512_set_epi32(`0` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50477	assert_eq_m512i(r, e);
50478	}
50479
50480	#[simd_test(enable = "avx512f")]
50481	unsafe fn test_mm512_mask_srli_epi32() {
50482	let a = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50483	let r = _mm512_mask_srli_epi32::<`1`>(a, `0`, a);
50484	assert_eq_m512i(r, a);
50485	let r = _mm512_mask_srli_epi32::<`1`>(a, `0b11111111_11111111`, a);
50486	let e = _mm512_set_epi32(`0` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50487	assert_eq_m512i(r, e);
50488	}
50489
50490	#[simd_test(enable = "avx512f")]
50491	unsafe fn test_mm512_maskz_srli_epi32() {
50492	let a = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `0`);
50493	let r = _mm512_maskz_srli_epi32::<`1`>(`0`, a);
50494	assert_eq_m512i(r, _mm512_setzero_si512());
50495	let r = _mm512_maskz_srli_epi32::<`1`>(`0b00000000_11111111`, a);
50496	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0` << `31`);
50497	assert_eq_m512i(r, e);
50498	}
50499
50500	#[simd_test(enable = "avx512f,avx512vl")]
50501	unsafe fn test_mm256_mask_srli_epi32() {
50502	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50503	let r = _mm256_mask_srli_epi32::<`1`>(a, `0`, a);
50504	assert_eq_m256i(r, a);
50505	let r = _mm256_mask_srli_epi32::<`1`>(a, `0b11111111`, a);
50506	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50507	assert_eq_m256i(r, e);
50508	}
50509
50510	#[simd_test(enable = "avx512f,avx512vl")]
50511	unsafe fn test_mm256_maskz_srli_epi32() {
50512	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50513	let r = _mm256_maskz_srli_epi32::<`1`>(`0`, a);
50514	assert_eq_m256i(r, _mm256_setzero_si256());
50515	let r = _mm256_maskz_srli_epi32::<`1`>(`0b11111111`, a);
50516	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50517	assert_eq_m256i(r, e);
50518	}
50519
50520	#[simd_test(enable = "avx512f,avx512vl")]
50521	unsafe fn test_mm_mask_srli_epi32() {
50522	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
50523	let r = _mm_mask_srli_epi32::<`1`>(a, `0`, a);
50524	assert_eq_m128i(r, a);
50525	let r = _mm_mask_srli_epi32::<`1`>(a, `0b00001111`, a);
50526	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
50527	assert_eq_m128i(r, e);
50528	}
50529
50530	#[simd_test(enable = "avx512f,avx512vl")]
50531	unsafe fn test_mm_maskz_srli_epi32() {
50532	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
50533	let r = _mm_maskz_srli_epi32::<`1`>(`0`, a);
50534	assert_eq_m128i(r, _mm_setzero_si128());
50535	let r = _mm_maskz_srli_epi32::<`1`>(`0b00001111`, a);
50536	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
50537	assert_eq_m128i(r, e);
50538	}
50539
50540	#[simd_test(enable = "avx512f")]
50541	unsafe fn test_mm512_rolv_epi32() {
50542	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50543	let b = _mm512_set1_epi32(`1`);
50544	let r = _mm512_rolv_epi32(a, b);
50545	let e = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50546	assert_eq_m512i(r, e);
50547	}
50548
50549	#[simd_test(enable = "avx512f")]
50550	unsafe fn test_mm512_mask_rolv_epi32() {
50551	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50552	let b = _mm512_set1_epi32(`1`);
50553	let r = _mm512_mask_rolv_epi32(a, `0`, a, b);
50554	assert_eq_m512i(r, a);
50555	let r = _mm512_mask_rolv_epi32(a, `0b11111111_11111111`, a, b);
50556	let e = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50557	assert_eq_m512i(r, e);
50558	}
50559
50560	#[simd_test(enable = "avx512f")]
50561	unsafe fn test_mm512_maskz_rolv_epi32() {
50562	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
50563	let b = _mm512_set1_epi32(`1`);
50564	let r = _mm512_maskz_rolv_epi32(`0`, a, b);
50565	assert_eq_m512i(r, _mm512_setzero_si512());
50566	let r = _mm512_maskz_rolv_epi32(`0b00000000_11111111`, a, b);
50567	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1` << `0`);
50568	assert_eq_m512i(r, e);
50569	}
50570
50571	#[simd_test(enable = "avx512f,avx512vl")]
50572	unsafe fn test_mm256_rolv_epi32() {
50573	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50574	let b = _mm256_set1_epi32(`1`);
50575	let r = _mm256_rolv_epi32(a, b);
50576	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50577	assert_eq_m256i(r, e);
50578	}
50579
50580	#[simd_test(enable = "avx512f,avx512vl")]
50581	unsafe fn test_mm256_mask_rolv_epi32() {
50582	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50583	let b = _mm256_set1_epi32(`1`);
50584	let r = _mm256_mask_rolv_epi32(a, `0`, a, b);
50585	assert_eq_m256i(r, a);
50586	let r = _mm256_mask_rolv_epi32(a, `0b11111111`, a, b);
50587	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50588	assert_eq_m256i(r, e);
50589	}
50590
50591	#[simd_test(enable = "avx512f,avx512vl")]
50592	unsafe fn test_mm256_maskz_rolv_epi32() {
50593	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50594	let b = _mm256_set1_epi32(`1`);
50595	let r = _mm256_maskz_rolv_epi32(`0`, a, b);
50596	assert_eq_m256i(r, _mm256_setzero_si256());
50597	let r = _mm256_maskz_rolv_epi32(`0b11111111`, a, b);
50598	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50599	assert_eq_m256i(r, e);
50600	}
50601
50602	#[simd_test(enable = "avx512f,avx512vl")]
50603	unsafe fn test_mm_rolv_epi32() {
50604	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50605	let b = _mm_set1_epi32(`1`);
50606	let r = _mm_rolv_epi32(a, b);
50607	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
50608	assert_eq_m128i(r, e);
50609	}
50610
50611	#[simd_test(enable = "avx512f,avx512vl")]
50612	unsafe fn test_mm_mask_rolv_epi32() {
50613	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50614	let b = _mm_set1_epi32(`1`);
50615	let r = _mm_mask_rolv_epi32(a, `0`, a, b);
50616	assert_eq_m128i(r, a);
50617	let r = _mm_mask_rolv_epi32(a, `0b00001111`, a, b);
50618	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
50619	assert_eq_m128i(r, e);
50620	}
50621
50622	#[simd_test(enable = "avx512f,avx512vl")]
50623	unsafe fn test_mm_maskz_rolv_epi32() {
50624	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50625	let b = _mm_set1_epi32(`1`);
50626	let r = _mm_maskz_rolv_epi32(`0`, a, b);
50627	assert_eq_m128i(r, _mm_setzero_si128());
50628	let r = _mm_maskz_rolv_epi32(`0b00001111`, a, b);
50629	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
50630	assert_eq_m128i(r, e);
50631	}
50632
50633	#[simd_test(enable = "avx512f")]
50634	unsafe fn test_mm512_rorv_epi32() {
50635	let a = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50636	let b = _mm512_set1_epi32(`1`);
50637	let r = _mm512_rorv_epi32(a, b);
50638	let e = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50639	assert_eq_m512i(r, e);
50640	}
50641
50642	#[simd_test(enable = "avx512f")]
50643	unsafe fn test_mm512_mask_rorv_epi32() {
50644	let a = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50645	let b = _mm512_set1_epi32(`1`);
50646	let r = _mm512_mask_rorv_epi32(a, `0`, a, b);
50647	assert_eq_m512i(r, a);
50648	let r = _mm512_mask_rorv_epi32(a, `0b11111111_11111111`, a, b);
50649	let e = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50650	assert_eq_m512i(r, e);
50651	}
50652
50653	#[simd_test(enable = "avx512f")]
50654	unsafe fn test_mm512_maskz_rorv_epi32() {
50655	let a = _mm512_set_epi32(`3`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1` << `0`);
50656	let b = _mm512_set1_epi32(`1`);
50657	let r = _mm512_maskz_rorv_epi32(`0`, a, b);
50658	assert_eq_m512i(r, _mm512_setzero_si512());
50659	let r = _mm512_maskz_rorv_epi32(`0b00000000_11111111`, a, b);
50660	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
50661	assert_eq_m512i(r, e);
50662	}
50663
50664	#[simd_test(enable = "avx512f,avx512vl")]
50665	unsafe fn test_mm256_rorv_epi32() {
50666	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50667	let b = _mm256_set1_epi32(`1`);
50668	let r = _mm256_rorv_epi32(a, b);
50669	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50670	assert_eq_m256i(r, e);
50671	}
50672
50673	#[simd_test(enable = "avx512f,avx512vl")]
50674	unsafe fn test_mm256_mask_rorv_epi32() {
50675	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50676	let b = _mm256_set1_epi32(`1`);
50677	let r = _mm256_mask_rorv_epi32(a, `0`, a, b);
50678	assert_eq_m256i(r, a);
50679	let r = _mm256_mask_rorv_epi32(a, `0b11111111`, a, b);
50680	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50681	assert_eq_m256i(r, e);
50682	}
50683
50684	#[simd_test(enable = "avx512f,avx512vl")]
50685	unsafe fn test_mm256_maskz_rorv_epi32() {
50686	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50687	let b = _mm256_set1_epi32(`1`);
50688	let r = _mm256_maskz_rorv_epi32(`0`, a, b);
50689	assert_eq_m256i(r, _mm256_setzero_si256());
50690	let r = _mm256_maskz_rorv_epi32(`0b11111111`, a, b);
50691	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50692	assert_eq_m256i(r, e);
50693	}
50694
50695	#[simd_test(enable = "avx512f,avx512vl")]
50696	unsafe fn test_mm_rorv_epi32() {
50697	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
50698	let b = _mm_set1_epi32(`1`);
50699	let r = _mm_rorv_epi32(a, b);
50700	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50701	assert_eq_m128i(r, e);
50702	}
50703
50704	#[simd_test(enable = "avx512f,avx512vl")]
50705	unsafe fn test_mm_mask_rorv_epi32() {
50706	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
50707	let b = _mm_set1_epi32(`1`);
50708	let r = _mm_mask_rorv_epi32(a, `0`, a, b);
50709	assert_eq_m128i(r, a);
50710	let r = _mm_mask_rorv_epi32(a, `0b00001111`, a, b);
50711	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50712	assert_eq_m128i(r, e);
50713	}
50714
50715	#[simd_test(enable = "avx512f,avx512vl")]
50716	unsafe fn test_mm_maskz_rorv_epi32() {
50717	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
50718	let b = _mm_set1_epi32(`1`);
50719	let r = _mm_maskz_rorv_epi32(`0`, a, b);
50720	assert_eq_m128i(r, _mm_setzero_si128());
50721	let r = _mm_maskz_rorv_epi32(`0b00001111`, a, b);
50722	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50723	assert_eq_m128i(r, e);
50724	}
50725
50726	#[simd_test(enable = "avx512f")]
50727	unsafe fn test_mm512_sllv_epi32() {
50728	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50729	let count = _mm512_set1_epi32(`1`);
50730	let r = _mm512_sllv_epi32(a, count);
50731	let e = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50732	assert_eq_m512i(r, e);
50733	}
50734
50735	#[simd_test(enable = "avx512f")]
50736	unsafe fn test_mm512_mask_sllv_epi32() {
50737	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50738	let count = _mm512_set1_epi32(`1`);
50739	let r = _mm512_mask_sllv_epi32(a, `0`, a, count);
50740	assert_eq_m512i(r, a);
50741	let r = _mm512_mask_sllv_epi32(a, `0b11111111_11111111`, a, count);
50742	let e = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50743	assert_eq_m512i(r, e);
50744	}
50745
50746	#[simd_test(enable = "avx512f")]
50747	unsafe fn test_mm512_maskz_sllv_epi32() {
50748	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
50749	let count = _mm512_set_epi32(`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50750	let r = _mm512_maskz_sllv_epi32(`0`, a, count);
50751	assert_eq_m512i(r, _mm512_setzero_si512());
50752	let r = _mm512_maskz_sllv_epi32(`0b00000000_11111111`, a, count);
50753	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `0`);
50754	assert_eq_m512i(r, e);
50755	}
50756
50757	#[simd_test(enable = "avx512f,avx512vl")]
50758	unsafe fn test_mm256_mask_sllv_epi32() {
50759	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50760	let count = _mm256_set1_epi32(`1`);
50761	let r = _mm256_mask_sllv_epi32(a, `0`, a, count);
50762	assert_eq_m256i(r, a);
50763	let r = _mm256_mask_sllv_epi32(a, `0b11111111`, a, count);
50764	let e = _mm256_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50765	assert_eq_m256i(r, e);
50766	}
50767
50768	#[simd_test(enable = "avx512f,avx512vl")]
50769	unsafe fn test_mm256_maskz_sllv_epi32() {
50770	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50771	let count = _mm256_set1_epi32(`1`);
50772	let r = _mm256_maskz_sllv_epi32(`0`, a, count);
50773	assert_eq_m256i(r, _mm256_setzero_si256());
50774	let r = _mm256_maskz_sllv_epi32(`0b11111111`, a, count);
50775	let e = _mm256_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50776	assert_eq_m256i(r, e);
50777	}
50778
50779	#[simd_test(enable = "avx512f,avx512vl")]
50780	unsafe fn test_mm_mask_sllv_epi32() {
50781	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50782	let count = _mm_set1_epi32(`1`);
50783	let r = _mm_mask_sllv_epi32(a, `0`, a, count);
50784	assert_eq_m128i(r, a);
50785	let r = _mm_mask_sllv_epi32(a, `0b00001111`, a, count);
50786	let e = _mm_set_epi32(`0`, `2`, `2`, `2`);
50787	assert_eq_m128i(r, e);
50788	}
50789
50790	#[simd_test(enable = "avx512f,avx512vl")]
50791	unsafe fn test_mm_maskz_sllv_epi32() {
50792	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
50793	let count = _mm_set1_epi32(`1`);
50794	let r = _mm_maskz_sllv_epi32(`0`, a, count);
50795	assert_eq_m128i(r, _mm_setzero_si128());
50796	let r = _mm_maskz_sllv_epi32(`0b00001111`, a, count);
50797	let e = _mm_set_epi32(`0`, `2`, `2`, `2`);
50798	assert_eq_m128i(r, e);
50799	}
50800
50801	#[simd_test(enable = "avx512f")]
50802	unsafe fn test_mm512_srlv_epi32() {
50803	let a = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50804	let count = _mm512_set1_epi32(`1`);
50805	let r = _mm512_srlv_epi32(a, count);
50806	let e = _mm512_set_epi32(`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50807	assert_eq_m512i(r, e);
50808	}
50809
50810	#[simd_test(enable = "avx512f")]
50811	unsafe fn test_mm512_mask_srlv_epi32() {
50812	let a = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50813	let count = _mm512_set1_epi32(`1`);
50814	let r = _mm512_mask_srlv_epi32(a, `0`, a, count);
50815	assert_eq_m512i(r, a);
50816	let r = _mm512_mask_srlv_epi32(a, `0b11111111_11111111`, a, count);
50817	let e = _mm512_set_epi32(`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50818	assert_eq_m512i(r, e);
50819	}
50820
50821	#[simd_test(enable = "avx512f")]
50822	unsafe fn test_mm512_maskz_srlv_epi32() {
50823	let a = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `0`);
50824	let count = _mm512_set_epi32(`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
50825	let r = _mm512_maskz_srlv_epi32(`0`, a, count);
50826	assert_eq_m512i(r, _mm512_setzero_si512());
50827	let r = _mm512_maskz_srlv_epi32(`0b00000000_11111111`, a, count);
50828	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`);
50829	assert_eq_m512i(r, e);
50830	}
50831
50832	#[simd_test(enable = "avx512f,avx512vl")]
50833	unsafe fn test_mm256_mask_srlv_epi32() {
50834	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50835	let count = _mm256_set1_epi32(`1`);
50836	let r = _mm256_mask_srlv_epi32(a, `0`, a, count);
50837	assert_eq_m256i(r, a);
50838	let r = _mm256_mask_srlv_epi32(a, `0b11111111`, a, count);
50839	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50840	assert_eq_m256i(r, e);
50841	}
50842
50843	#[simd_test(enable = "avx512f,avx512vl")]
50844	unsafe fn test_mm256_maskz_srlv_epi32() {
50845	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50846	let count = _mm256_set1_epi32(`1`);
50847	let r = _mm256_maskz_srlv_epi32(`0`, a, count);
50848	assert_eq_m256i(r, _mm256_setzero_si256());
50849	let r = _mm256_maskz_srlv_epi32(`0b11111111`, a, count);
50850	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50851	assert_eq_m256i(r, e);
50852	}
50853
50854	#[simd_test(enable = "avx512f,avx512vl")]
50855	unsafe fn test_mm_mask_srlv_epi32() {
50856	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
50857	let count = _mm_set1_epi32(`1`);
50858	let r = _mm_mask_srlv_epi32(a, `0`, a, count);
50859	assert_eq_m128i(r, a);
50860	let r = _mm_mask_srlv_epi32(a, `0b00001111`, a, count);
50861	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
50862	assert_eq_m128i(r, e);
50863	}
50864
50865	#[simd_test(enable = "avx512f,avx512vl")]
50866	unsafe fn test_mm_maskz_srlv_epi32() {
50867	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
50868	let count = _mm_set1_epi32(`1`);
50869	let r = _mm_maskz_srlv_epi32(`0`, a, count);
50870	assert_eq_m128i(r, _mm_setzero_si128());
50871	let r = _mm_maskz_srlv_epi32(`0b00001111`, a, count);
50872	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
50873	assert_eq_m128i(r, e);
50874	}
50875
50876	#[simd_test(enable = "avx512f")]
50877	unsafe fn test_mm512_sll_epi32() {
50878	#[rustfmt::skip]
50879	let a = _mm512_set_epi32(
50880	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
50881	`0`, `0`, `0`, `0`,
50882	`0`, `0`, `0`, `0`,
50883	`0`, `0`, `0`, `0`,
50884	);
50885	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
50886	let r = _mm512_sll_epi32(a, count);
50887	#[rustfmt::skip]
50888	let e = _mm512_set_epi32(
50889	`0`, `1` << `2`, `1` << `3`, `1` << `4`,
50890	`0`, `0`, `0`, `0`,
50891	`0`, `0`, `0`, `0`,
50892	`0`, `0`, `0`, `0`,
50893	);
50894	assert_eq_m512i(r, e);
50895	}
50896
50897	#[simd_test(enable = "avx512f")]
50898	unsafe fn test_mm512_mask_sll_epi32() {
50899	#[rustfmt::skip]
50900	let a = _mm512_set_epi32(
50901	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
50902	`0`, `0`, `0`, `0`,
50903	`0`, `0`, `0`, `0`,
50904	`0`, `0`, `0`, `0`,
50905	);
50906	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
50907	let r = _mm512_mask_sll_epi32(a, `0`, a, count);
50908	assert_eq_m512i(r, a);
50909	let r = _mm512_mask_sll_epi32(a, `0b11111111_11111111`, a, count);
50910	#[rustfmt::skip]
50911	let e = _mm512_set_epi32(
50912	`0`, `1` << `2`, `1` << `3`, `1` << `4`,
50913	`0`, `0`, `0`, `0`,
50914	`0`, `0`, `0`, `0`,
50915	`0`, `0`, `0`, `0`,
50916	);
50917	assert_eq_m512i(r, e);
50918	}
50919
50920	#[simd_test(enable = "avx512f")]
50921	unsafe fn test_mm512_maskz_sll_epi32() {
50922	#[rustfmt::skip]
50923	let a = _mm512_set_epi32(
50924	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
50925	`0`, `0`, `0`, `0`,
50926	`0`, `0`, `0`, `0`,
50927	`0`, `0`, `0`, `1` << `31`,
50928	);
50929	let count = _mm_set_epi32(`2`, `0`, `0`, `2`);
50930	let r = _mm512_maskz_sll_epi32(`0`, a, count);
50931	assert_eq_m512i(r, _mm512_setzero_si512());
50932	let r = _mm512_maskz_sll_epi32(`0b00000000_11111111`, a, count);
50933	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50934	assert_eq_m512i(r, e);
50935	}
50936
50937	#[simd_test(enable = "avx512f,avx512vl")]
50938	unsafe fn test_mm256_mask_sll_epi32() {
50939	let a = _mm256_set_epi32(`1` << `13`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50940	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
50941	let r = _mm256_mask_sll_epi32(a, `0`, a, count);
50942	assert_eq_m256i(r, a);
50943	let r = _mm256_mask_sll_epi32(a, `0b11111111`, a, count);
50944	let e = _mm256_set_epi32(`1` << `14`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50945	assert_eq_m256i(r, e);
50946	}
50947
50948	#[simd_test(enable = "avx512f,avx512vl")]
50949	unsafe fn test_mm256_maskz_sll_epi32() {
50950	let a = _mm256_set_epi32(`1` << `13`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50951	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
50952	let r = _mm256_maskz_sll_epi32(`0`, a, count);
50953	assert_eq_m256i(r, _mm256_setzero_si256());
50954	let r = _mm256_maskz_sll_epi32(`0b11111111`, a, count);
50955	let e = _mm256_set_epi32(`1` << `14`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50956	assert_eq_m256i(r, e);
50957	}
50958
50959	#[simd_test(enable = "avx512f,avx512vl")]
50960	unsafe fn test_mm_mask_sll_epi32() {
50961	let a = _mm_set_epi32(`1` << `13`, `0`, `0`, `0`);
50962	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
50963	let r = _mm_mask_sll_epi32(a, `0`, a, count);
50964	assert_eq_m128i(r, a);
50965	let r = _mm_mask_sll_epi32(a, `0b00001111`, a, count);
50966	let e = _mm_set_epi32(`1` << `14`, `0`, `0`, `0`);
50967	assert_eq_m128i(r, e);
50968	}
50969
50970	#[simd_test(enable = "avx512f,avx512vl")]
50971	unsafe fn test_mm_maskz_sll_epi32() {
50972	let a = _mm_set_epi32(`1` << `13`, `0`, `0`, `0`);
50973	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
50974	let r = _mm_maskz_sll_epi32(`0`, a, count);
50975	assert_eq_m128i(r, _mm_setzero_si128());
50976	let r = _mm_maskz_sll_epi32(`0b00001111`, a, count);
50977	let e = _mm_set_epi32(`1` << `14`, `0`, `0`, `0`);
50978	assert_eq_m128i(r, e);
50979	}
50980
50981	#[simd_test(enable = "avx512f")]
50982	unsafe fn test_mm512_srl_epi32() {
50983	#[rustfmt::skip]
50984	let a = _mm512_set_epi32(
50985	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
50986	`0`, `0`, `0`, `0`,
50987	`0`, `0`, `0`, `0`,
50988	`0`, `0`, `0`, `0`,
50989	);
50990	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
50991	let r = _mm512_srl_epi32(a, count);
50992	let e = _mm512_set_epi32(`1` << `29`, `0`, `0`, `1` << `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50993	assert_eq_m512i(r, e);
50994	}
50995
50996	#[simd_test(enable = "avx512f")]
50997	unsafe fn test_mm512_mask_srl_epi32() {
50998	#[rustfmt::skip]
50999	let a = _mm512_set_epi32(
51000	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
51001	`0`, `0`, `0`, `0`,
51002	`0`, `0`, `0`, `0`,
51003	`0`, `0`, `0`, `0`,
51004	);
51005	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
51006	let r = _mm512_mask_srl_epi32(a, `0`, a, count);
51007	assert_eq_m512i(r, a);
51008	let r = _mm512_mask_srl_epi32(a, `0b11111111_11111111`, a, count);
51009	let e = _mm512_set_epi32(`1` << `29`, `0`, `0`, `1` << `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51010	assert_eq_m512i(r, e);
51011	}
51012
51013	#[simd_test(enable = "avx512f")]
51014	unsafe fn test_mm512_maskz_srl_epi32() {
51015	#[rustfmt::skip]
51016	let a = _mm512_set_epi32(
51017	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
51018	`0`, `0`, `0`, `0`,
51019	`0`, `0`, `0`, `0`,
51020	`0`, `0`, `0`, `1` << `31`,
51021	);
51022	let count = _mm_set_epi32(`2`, `0`, `0`, `2`);
51023	let r = _mm512_maskz_srl_epi32(`0`, a, count);
51024	assert_eq_m512i(r, _mm512_setzero_si512());
51025	let r = _mm512_maskz_srl_epi32(`0b00000000_11111111`, a, count);
51026	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `29`);
51027	assert_eq_m512i(r, e);
51028	}
51029
51030	#[simd_test(enable = "avx512f,avx512vl")]
51031	unsafe fn test_mm256_mask_srl_epi32() {
51032	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51033	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
51034	let r = _mm256_mask_srl_epi32(a, `0`, a, count);
51035	assert_eq_m256i(r, a);
51036	let r = _mm256_mask_srl_epi32(a, `0b11111111`, a, count);
51037	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51038	assert_eq_m256i(r, e);
51039	}
51040
51041	#[simd_test(enable = "avx512f,avx512vl")]
51042	unsafe fn test_mm256_maskz_srl_epi32() {
51043	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51044	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
51045	let r = _mm256_maskz_srl_epi32(`0`, a, count);
51046	assert_eq_m256i(r, _mm256_setzero_si256());
51047	let r = _mm256_maskz_srl_epi32(`0b11111111`, a, count);
51048	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51049	assert_eq_m256i(r, e);
51050	}
51051
51052	#[simd_test(enable = "avx512f,avx512vl")]
51053	unsafe fn test_mm_mask_srl_epi32() {
51054	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
51055	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
51056	let r = _mm_mask_srl_epi32(a, `0`, a, count);
51057	assert_eq_m128i(r, a);
51058	let r = _mm_mask_srl_epi32(a, `0b00001111`, a, count);
51059	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
51060	assert_eq_m128i(r, e);
51061	}
51062
51063	#[simd_test(enable = "avx512f,avx512vl")]
51064	unsafe fn test_mm_maskz_srl_epi32() {
51065	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
51066	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
51067	let r = _mm_maskz_srl_epi32(`0`, a, count);
51068	assert_eq_m128i(r, _mm_setzero_si128());
51069	let r = _mm_maskz_srl_epi32(`0b00001111`, a, count);
51070	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
51071	assert_eq_m128i(r, e);
51072	}
51073
51074	#[simd_test(enable = "avx512f")]
51075	unsafe fn test_mm512_sra_epi32() {
51076	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
51077	let count = _mm_set_epi32(`1`, `0`, `0`, `2`);
51078	let r = _mm512_sra_epi32(a, count);
51079	let e = _mm512_set_epi32(`2`, `-2`, `4`, `-4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51080	assert_eq_m512i(r, e);
51081	}
51082
51083	#[simd_test(enable = "avx512f")]
51084	unsafe fn test_mm512_mask_sra_epi32() {
51085	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `16`);
51086	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
51087	let r = _mm512_mask_sra_epi32(a, `0`, a, count);
51088	assert_eq_m512i(r, a);
51089	let r = _mm512_mask_sra_epi32(a, `0b11111111_11111111`, a, count);
51090	let e = _mm512_set_epi32(`2`, `-2`, `4`, `-4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`);
51091	assert_eq_m512i(r, e);
51092	}
51093
51094	#[simd_test(enable = "avx512f")]
51095	unsafe fn test_mm512_maskz_sra_epi32() {
51096	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-15`, `-14`);
51097	let count = _mm_set_epi32(`2`, `0`, `0`, `2`);
51098	let r = _mm512_maskz_sra_epi32(`0`, a, count);
51099	assert_eq_m512i(r, _mm512_setzero_si512());
51100	let r = _mm512_maskz_sra_epi32(`0b00000000_11111111`, a, count);
51101	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-4`, `-4`);
51102	assert_eq_m512i(r, e);
51103	}
51104
51105	#[simd_test(enable = "avx512f,avx512vl")]
51106	unsafe fn test_mm256_mask_sra_epi32() {
51107	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51108	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
51109	let r = _mm256_mask_sra_epi32(a, `0`, a, count);
51110	assert_eq_m256i(r, a);
51111	let r = _mm256_mask_sra_epi32(a, `0b11111111`, a, count);
51112	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51113	assert_eq_m256i(r, e);
51114	}
51115
51116	#[simd_test(enable = "avx512f,avx512vl")]
51117	unsafe fn test_mm256_maskz_sra_epi32() {
51118	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51119	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
51120	let r = _mm256_maskz_sra_epi32(`0`, a, count);
51121	assert_eq_m256i(r, _mm256_setzero_si256());
51122	let r = _mm256_maskz_sra_epi32(`0b11111111`, a, count);
51123	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51124	assert_eq_m256i(r, e);
51125	}
51126
51127	#[simd_test(enable = "avx512f,avx512vl")]
51128	unsafe fn test_mm_mask_sra_epi32() {
51129	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
51130	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
51131	let r = _mm_mask_sra_epi32(a, `0`, a, count);
51132	assert_eq_m128i(r, a);
51133	let r = _mm_mask_sra_epi32(a, `0b00001111`, a, count);
51134	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
51135	assert_eq_m128i(r, e);
51136	}
51137
51138	#[simd_test(enable = "avx512f,avx512vl")]
51139	unsafe fn test_mm_maskz_sra_epi32() {
51140	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
51141	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
51142	let r = _mm_maskz_sra_epi32(`0`, a, count);
51143	assert_eq_m128i(r, _mm_setzero_si128());
51144	let r = _mm_maskz_sra_epi32(`0b00001111`, a, count);
51145	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
51146	assert_eq_m128i(r, e);
51147	}
51148
51149	#[simd_test(enable = "avx512f")]
51150	unsafe fn test_mm512_srav_epi32() {
51151	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
51152	let count = _mm512_set_epi32(`2`, `2`, `2`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51153	let r = _mm512_srav_epi32(a, count);
51154	let e = _mm512_set_epi32(`2`, `-2`, `4`, `-4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
51155	assert_eq_m512i(r, e);
51156	}
51157
51158	#[simd_test(enable = "avx512f")]
51159	unsafe fn test_mm512_mask_srav_epi32() {
51160	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `16`);
51161	let count = _mm512_set_epi32(`2`, `2`, `2`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
51162	let r = _mm512_mask_srav_epi32(a, `0`, a, count);
51163	assert_eq_m512i(r, a);
51164	let r = _mm512_mask_srav_epi32(a, `0b11111111_11111111`, a, count);
51165	let e = _mm512_set_epi32(`2`, `-2`, `4`, `-4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`);
51166	assert_eq_m512i(r, e);
51167	}
51168
51169	#[simd_test(enable = "avx512f")]
51170	unsafe fn test_mm512_maskz_srav_epi32() {
51171	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-15`, `-14`);
51172	let count = _mm512_set_epi32(`2`, `2`, `2`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`);
51173	let r = _mm512_maskz_srav_epi32(`0`, a, count);
51174	assert_eq_m512i(r, _mm512_setzero_si512());
51175	let r = _mm512_maskz_srav_epi32(`0b00000000_11111111`, a, count);
51176	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-4`, `-4`);
51177	assert_eq_m512i(r, e);
51178	}
51179
51180	#[simd_test(enable = "avx512f,avx512vl")]
51181	unsafe fn test_mm256_mask_srav_epi32() {
51182	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51183	let count = _mm256_set1_epi32(`1`);
51184	let r = _mm256_mask_srav_epi32(a, `0`, a, count);
51185	assert_eq_m256i(r, a);
51186	let r = _mm256_mask_srav_epi32(a, `0b11111111`, a, count);
51187	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51188	assert_eq_m256i(r, e);
51189	}
51190
51191	#[simd_test(enable = "avx512f,avx512vl")]
51192	unsafe fn test_mm256_maskz_srav_epi32() {
51193	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51194	let count = _mm256_set1_epi32(`1`);
51195	let r = _mm256_maskz_srav_epi32(`0`, a, count);
51196	assert_eq_m256i(r, _mm256_setzero_si256());
51197	let r = _mm256_maskz_srav_epi32(`0b11111111`, a, count);
51198	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51199	assert_eq_m256i(r, e);
51200	}
51201
51202	#[simd_test(enable = "avx512f,avx512vl")]
51203	unsafe fn test_mm_mask_srav_epi32() {
51204	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
51205	let count = _mm_set1_epi32(`1`);
51206	let r = _mm_mask_srav_epi32(a, `0`, a, count);
51207	assert_eq_m128i(r, a);
51208	let r = _mm_mask_srav_epi32(a, `0b00001111`, a, count);
51209	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
51210	assert_eq_m128i(r, e);
51211	}
51212
51213	#[simd_test(enable = "avx512f,avx512vl")]
51214	unsafe fn test_mm_maskz_srav_epi32() {
51215	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
51216	let count = _mm_set1_epi32(`1`);
51217	let r = _mm_maskz_srav_epi32(`0`, a, count);
51218	assert_eq_m128i(r, _mm_setzero_si128());
51219	let r = _mm_maskz_srav_epi32(`0b00001111`, a, count);
51220	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
51221	assert_eq_m128i(r, e);
51222	}
51223
51224	#[simd_test(enable = "avx512f")]
51225	unsafe fn test_mm512_srai_epi32() {
51226	let a = _mm512_set_epi32(`8`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `16`, `-15`);
51227	let r = _mm512_srai_epi32::<`2`>(a);
51228	let e = _mm512_set_epi32(`2`, `-2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`, `-4`);
51229	assert_eq_m512i(r, e);
51230	}
51231
51232	#[simd_test(enable = "avx512f")]
51233	unsafe fn test_mm512_mask_srai_epi32() {
51234	let a = _mm512_set_epi32(`8`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `15`, `-15`);
51235	let r = _mm512_mask_srai_epi32::<`2`>(a, `0`, a);
51236	assert_eq_m512i(r, a);
51237	let r = _mm512_mask_srai_epi32::<`2`>(a, `0b11111111_11111111`, a);
51238	let e = _mm512_set_epi32(`2`, `-2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `3`, `-4`);
51239	assert_eq_m512i(r, e);
51240	}
51241
51242	#[simd_test(enable = "avx512f")]
51243	unsafe fn test_mm512_maskz_srai_epi32() {
51244	let a = _mm512_set_epi32(`8`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `15`, `-15`);
51245	let r = _mm512_maskz_srai_epi32::<`2`>(`0`, a);
51246	assert_eq_m512i(r, _mm512_setzero_si512());
51247	let r = _mm512_maskz_srai_epi32::<`2`>(`0b00000000_11111111`, a);
51248	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `3`, `-4`);
51249	assert_eq_m512i(r, e);
51250	}
51251
51252	#[simd_test(enable = "avx512f,avx512vl")]
51253	unsafe fn test_mm256_mask_srai_epi32() {
51254	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51255	let r = _mm256_mask_srai_epi32::<`1`>(a, `0`, a);
51256	assert_eq_m256i(r, a);
51257	let r = _mm256_mask_srai_epi32::<`1`>(a, `0b11111111`, a);
51258	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51259	assert_eq_m256i(r, e);
51260	}
51261
51262	#[simd_test(enable = "avx512f,avx512vl")]
51263	unsafe fn test_mm256_maskz_srai_epi32() {
51264	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51265	let r = _mm256_maskz_srai_epi32::<`1`>(`0`, a);
51266	assert_eq_m256i(r, _mm256_setzero_si256());
51267	let r = _mm256_maskz_srai_epi32::<`1`>(`0b11111111`, a);
51268	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51269	assert_eq_m256i(r, e);
51270	}
51271
51272	#[simd_test(enable = "avx512f,avx512vl")]
51273	unsafe fn test_mm_mask_srai_epi32() {
51274	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
51275	let r = _mm_mask_srai_epi32::<`1`>(a, `0`, a);
51276	assert_eq_m128i(r, a);
51277	let r = _mm_mask_srai_epi32::<`1`>(a, `0b00001111`, a);
51278	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
51279	assert_eq_m128i(r, e);
51280	}
51281
51282	#[simd_test(enable = "avx512f,avx512vl")]
51283	unsafe fn test_mm_maskz_srai_epi32() {
51284	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
51285	let r = _mm_maskz_srai_epi32::<`1`>(`0`, a);
51286	assert_eq_m128i(r, _mm_setzero_si128());
51287	let r = _mm_maskz_srai_epi32::<`1`>(`0b00001111`, a);
51288	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
51289	assert_eq_m128i(r, e);
51290	}
51291
51292	#[simd_test(enable = "avx512f")]
51293	unsafe fn test_mm512_permute_ps() {
51294	let a = _mm512_setr_ps(
51295	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51296	);
51297	let r = _mm512_permute_ps::<`0b11_11_11_11`>(a);
51298	let e = _mm512_setr_ps(
51299	`3.`, `3.`, `3.`, `3.`, `7.`, `7.`, `7.`, `7.`, `11.`, `11.`, `11.`, `11.`, `15.`, `15.`, `15.`, `15.`,
51300	);
51301	assert_eq_m512(r, e);
51302	}
51303
51304	#[simd_test(enable = "avx512f")]
51305	unsafe fn test_mm512_mask_permute_ps() {
51306	let a = _mm512_setr_ps(
51307	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51308	);
51309	let r = _mm512_mask_permute_ps::<`0b11_11_11_11`>(a, `0`, a);
51310	assert_eq_m512(r, a);
51311	let r = _mm512_mask_permute_ps::<`0b11_11_11_11`>(a, `0b11111111_11111111`, a);
51312	let e = _mm512_setr_ps(
51313	`3.`, `3.`, `3.`, `3.`, `7.`, `7.`, `7.`, `7.`, `11.`, `11.`, `11.`, `11.`, `15.`, `15.`, `15.`, `15.`,
51314	);
51315	assert_eq_m512(r, e);
51316	}
51317
51318	#[simd_test(enable = "avx512f")]
51319	unsafe fn test_mm512_maskz_permute_ps() {
51320	let a = _mm512_setr_ps(
51321	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51322	);
51323	let r = _mm512_maskz_permute_ps::<`0b11_11_11_11`>(`0`, a);
51324	assert_eq_m512(r, _mm512_setzero_ps());
51325	let r = _mm512_maskz_permute_ps::<`0b11_11_11_11`>(`0b11111111_11111111`, a);
51326	let e = _mm512_setr_ps(
51327	`3.`, `3.`, `3.`, `3.`, `7.`, `7.`, `7.`, `7.`, `11.`, `11.`, `11.`, `11.`, `15.`, `15.`, `15.`, `15.`,
51328	);
51329	assert_eq_m512(r, e);
51330	}
51331
51332	#[simd_test(enable = "avx512f,avx512vl")]
51333	unsafe fn test_mm256_mask_permute_ps() {
51334	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
51335	let r = _mm256_mask_permute_ps::<`0b11_11_11_11`>(a, `0`, a);
51336	assert_eq_m256(r, a);
51337	let r = _mm256_mask_permute_ps::<`0b11_11_11_11`>(a, `0b11111111`, a);
51338	let e = _mm256_set_ps(`0.`, `0.`, `0.`, `0.`, `4.`, `4.`, `4.`, `4.`);
51339	assert_eq_m256(r, e);
51340	}
51341
51342	#[simd_test(enable = "avx512f,avx512vl")]
51343	unsafe fn test_mm256_maskz_permute_ps() {
51344	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
51345	let r = _mm256_maskz_permute_ps::<`0b11_11_11_11`>(`0`, a);
51346	assert_eq_m256(r, _mm256_setzero_ps());
51347	let r = _mm256_maskz_permute_ps::<`0b11_11_11_11`>(`0b11111111`, a);
51348	let e = _mm256_set_ps(`0.`, `0.`, `0.`, `0.`, `4.`, `4.`, `4.`, `4.`);
51349	assert_eq_m256(r, e);
51350	}
51351
51352	#[simd_test(enable = "avx512f,avx512vl")]
51353	unsafe fn test_mm_mask_permute_ps() {
51354	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
51355	let r = _mm_mask_permute_ps::<`0b11_11_11_11`>(a, `0`, a);
51356	assert_eq_m128(r, a);
51357	let r = _mm_mask_permute_ps::<`0b11_11_11_11`>(a, `0b00001111`, a);
51358	let e = _mm_set_ps(`0.`, `0.`, `0.`, `0.`);
51359	assert_eq_m128(r, e);
51360	}
51361
51362	#[simd_test(enable = "avx512f,avx512vl")]
51363	unsafe fn test_mm_maskz_permute_ps() {
51364	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
51365	let r = _mm_maskz_permute_ps::<`0b11_11_11_11`>(`0`, a);
51366	assert_eq_m128(r, _mm_setzero_ps());
51367	let r = _mm_maskz_permute_ps::<`0b11_11_11_11`>(`0b00001111`, a);
51368	let e = _mm_set_ps(`0.`, `0.`, `0.`, `0.`);
51369	assert_eq_m128(r, e);
51370	}
51371
51372	#[simd_test(enable = "avx512f")]
51373	unsafe fn test_mm512_permutevar_epi32() {
51374	let idx = _mm512_set1_epi32(`1`);
51375	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51376	let r = _mm512_permutevar_epi32(idx, a);
51377	let e = _mm512_set1_epi32(`14`);
51378	assert_eq_m512i(r, e);
51379	}
51380
51381	#[simd_test(enable = "avx512f")]
51382	unsafe fn test_mm512_mask_permutevar_epi32() {
51383	let idx = _mm512_set1_epi32(`1`);
51384	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51385	let r = _mm512_mask_permutevar_epi32(a, `0`, idx, a);
51386	assert_eq_m512i(r, a);
51387	let r = _mm512_mask_permutevar_epi32(a, `0b11111111_11111111`, idx, a);
51388	let e = _mm512_set1_epi32(`14`);
51389	assert_eq_m512i(r, e);
51390	}
51391
51392	#[simd_test(enable = "avx512f")]
51393	unsafe fn test_mm512_permutevar_ps() {
51394	let a = _mm512_set_ps(
51395	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51396	);
51397	let b = _mm512_set1_epi32(`0b01`);
51398	let r = _mm512_permutevar_ps(a, b);
51399	let e = _mm512_set_ps(
51400	`2.`, `2.`, `2.`, `2.`, `6.`, `6.`, `6.`, `6.`, `10.`, `10.`, `10.`, `10.`, `14.`, `14.`, `14.`, `14.`,
51401	);
51402	assert_eq_m512(r, e);
51403	}
51404
51405	#[simd_test(enable = "avx512f")]
51406	unsafe fn test_mm512_mask_permutevar_ps() {
51407	let a = _mm512_set_ps(
51408	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51409	);
51410	let b = _mm512_set1_epi32(`0b01`);
51411	let r = _mm512_mask_permutevar_ps(a, `0`, a, b);
51412	assert_eq_m512(r, a);
51413	let r = _mm512_mask_permutevar_ps(a, `0b11111111_11111111`, a, b);
51414	let e = _mm512_set_ps(
51415	`2.`, `2.`, `2.`, `2.`, `6.`, `6.`, `6.`, `6.`, `10.`, `10.`, `10.`, `10.`, `14.`, `14.`, `14.`, `14.`,
51416	);
51417	assert_eq_m512(r, e);
51418	}
51419
51420	#[simd_test(enable = "avx512f")]
51421	unsafe fn test_mm512_maskz_permutevar_ps() {
51422	let a = _mm512_set_ps(
51423	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51424	);
51425	let b = _mm512_set1_epi32(`0b01`);
51426	let r = _mm512_maskz_permutevar_ps(`0`, a, b);
51427	assert_eq_m512(r, _mm512_setzero_ps());
51428	let r = _mm512_maskz_permutevar_ps(`0b00000000_11111111`, a, b);
51429	let e = _mm512_set_ps(
51430	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `10.`, `10.`, `10.`, `10.`, `14.`, `14.`, `14.`, `14.`,
51431	);
51432	assert_eq_m512(r, e);
51433	}
51434
51435	#[simd_test(enable = "avx512f,avx512vl")]
51436	unsafe fn test_mm256_mask_permutevar_ps() {
51437	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
51438	let b = _mm256_set1_epi32(`0b01`);
51439	let r = _mm256_mask_permutevar_ps(a, `0`, a, b);
51440	assert_eq_m256(r, a);
51441	let r = _mm256_mask_permutevar_ps(a, `0b11111111`, a, b);
51442	let e = _mm256_set_ps(`2.`, `2.`, `2.`, `2.`, `6.`, `6.`, `6.`, `6.`);
51443	assert_eq_m256(r, e);
51444	}
51445
51446	#[simd_test(enable = "avx512f,avx512vl")]
51447	unsafe fn test_mm256_maskz_permutevar_ps() {
51448	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
51449	let b = _mm256_set1_epi32(`0b01`);
51450	let r = _mm256_maskz_permutevar_ps(`0`, a, b);
51451	assert_eq_m256(r, _mm256_setzero_ps());
51452	let r = _mm256_maskz_permutevar_ps(`0b11111111`, a, b);
51453	let e = _mm256_set_ps(`2.`, `2.`, `2.`, `2.`, `6.`, `6.`, `6.`, `6.`);
51454	assert_eq_m256(r, e);
51455	}
51456
51457	#[simd_test(enable = "avx512f,avx512vl")]
51458	unsafe fn test_mm_mask_permutevar_ps() {
51459	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
51460	let b = _mm_set1_epi32(`0b01`);
51461	let r = _mm_mask_permutevar_ps(a, `0`, a, b);
51462	assert_eq_m128(r, a);
51463	let r = _mm_mask_permutevar_ps(a, `0b00001111`, a, b);
51464	let e = _mm_set_ps(`2.`, `2.`, `2.`, `2.`);
51465	assert_eq_m128(r, e);
51466	}
51467
51468	#[simd_test(enable = "avx512f,avx512vl")]
51469	unsafe fn test_mm_maskz_permutevar_ps() {
51470	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
51471	let b = _mm_set1_epi32(`0b01`);
51472	let r = _mm_maskz_permutevar_ps(`0`, a, b);
51473	assert_eq_m128(r, _mm_setzero_ps());
51474	let r = _mm_maskz_permutevar_ps(`0b00001111`, a, b);
51475	let e = _mm_set_ps(`2.`, `2.`, `2.`, `2.`);
51476	assert_eq_m128(r, e);
51477	}
51478
51479	#[simd_test(enable = "avx512f")]
51480	unsafe fn test_mm512_permutexvar_epi32() {
51481	let idx = _mm512_set1_epi32(`1`);
51482	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51483	let r = _mm512_permutexvar_epi32(idx, a);
51484	let e = _mm512_set1_epi32(`14`);
51485	assert_eq_m512i(r, e);
51486	}
51487
51488	#[simd_test(enable = "avx512f")]
51489	unsafe fn test_mm512_mask_permutexvar_epi32() {
51490	let idx = _mm512_set1_epi32(`1`);
51491	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51492	let r = _mm512_mask_permutexvar_epi32(a, `0`, idx, a);
51493	assert_eq_m512i(r, a);
51494	let r = _mm512_mask_permutexvar_epi32(a, `0b11111111_11111111`, idx, a);
51495	let e = _mm512_set1_epi32(`14`);
51496	assert_eq_m512i(r, e);
51497	}
51498
51499	#[simd_test(enable = "avx512f")]
51500	unsafe fn test_mm512_maskz_permutexvar_epi32() {
51501	let idx = _mm512_set1_epi32(`1`);
51502	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51503	let r = _mm512_maskz_permutexvar_epi32(`0`, idx, a);
51504	assert_eq_m512i(r, _mm512_setzero_si512());
51505	let r = _mm512_maskz_permutexvar_epi32(`0b00000000_11111111`, idx, a);
51506	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `14`, `14`, `14`, `14`, `14`, `14`, `14`, `14`);
51507	assert_eq_m512i(r, e);
51508	}
51509
51510	#[simd_test(enable = "avx512f,avx512vl")]
51511	unsafe fn test_mm256_permutexvar_epi32() {
51512	let idx = _mm256_set1_epi32(`1`);
51513	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
51514	let r = _mm256_permutexvar_epi32(idx, a);
51515	let e = _mm256_set1_epi32(`6`);
51516	assert_eq_m256i(r, e);
51517	}
51518
51519	#[simd_test(enable = "avx512f,avx512vl")]
51520	unsafe fn test_mm256_mask_permutexvar_epi32() {
51521	let idx = _mm256_set1_epi32(`1`);
51522	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
51523	let r = _mm256_mask_permutexvar_epi32(a, `0`, idx, a);
51524	assert_eq_m256i(r, a);
51525	let r = _mm256_mask_permutexvar_epi32(a, `0b11111111`, idx, a);
51526	let e = _mm256_set1_epi32(`6`);
51527	assert_eq_m256i(r, e);
51528	}
51529
51530	#[simd_test(enable = "avx512f,avx512vl")]
51531	unsafe fn test_mm256_maskz_permutexvar_epi32() {
51532	let idx = _mm256_set1_epi32(`1`);
51533	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
51534	let r = _mm256_maskz_permutexvar_epi32(`0`, idx, a);
51535	assert_eq_m256i(r, _mm256_setzero_si256());
51536	let r = _mm256_maskz_permutexvar_epi32(`0b11111111`, idx, a);
51537	let e = _mm256_set1_epi32(`6`);
51538	assert_eq_m256i(r, e);
51539	}
51540
51541	#[simd_test(enable = "avx512f")]
51542	unsafe fn test_mm512_permutexvar_ps() {
51543	let idx = _mm512_set1_epi32(`1`);
51544	let a = _mm512_set_ps(
51545	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51546	);
51547	let r = _mm512_permutexvar_ps(idx, a);
51548	let e = _mm512_set1_ps(`14.`);
51549	assert_eq_m512(r, e);
51550	}
51551
51552	#[simd_test(enable = "avx512f")]
51553	unsafe fn test_mm512_mask_permutexvar_ps() {
51554	let idx = _mm512_set1_epi32(`1`);
51555	let a = _mm512_set_ps(
51556	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51557	);
51558	let r = _mm512_mask_permutexvar_ps(a, `0`, idx, a);
51559	assert_eq_m512(r, a);
51560	let r = _mm512_mask_permutexvar_ps(a, `0b11111111_11111111`, idx, a);
51561	let e = _mm512_set1_ps(`14.`);
51562	assert_eq_m512(r, e);
51563	}
51564
51565	#[simd_test(enable = "avx512f")]
51566	unsafe fn test_mm512_maskz_permutexvar_ps() {
51567	let idx = _mm512_set1_epi32(`1`);
51568	let a = _mm512_set_ps(
51569	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51570	);
51571	let r = _mm512_maskz_permutexvar_ps(`0`, idx, a);
51572	assert_eq_m512(r, _mm512_setzero_ps());
51573	let r = _mm512_maskz_permutexvar_ps(`0b00000000_11111111`, idx, a);
51574	let e = _mm512_set_ps(
51575	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `14.`, `14.`, `14.`, `14.`, `14.`, `14.`, `14.`, `14.`,
51576	);
51577	assert_eq_m512(r, e);
51578	}
51579
51580	#[simd_test(enable = "avx512f,avx512vl")]
51581	unsafe fn test_mm256_permutexvar_ps() {
51582	let idx = _mm256_set1_epi32(`1`);
51583	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
51584	let r = _mm256_permutexvar_ps(idx, a);
51585	let e = _mm256_set1_ps(`6.`);
51586	assert_eq_m256(r, e);
51587	}
51588
51589	#[simd_test(enable = "avx512f,avx512vl")]
51590	unsafe fn test_mm256_mask_permutexvar_ps() {
51591	let idx = _mm256_set1_epi32(`1`);
51592	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
51593	let r = _mm256_mask_permutexvar_ps(a, `0`, idx, a);
51594	assert_eq_m256(r, a);
51595	let r = _mm256_mask_permutexvar_ps(a, `0b11111111`, idx, a);
51596	let e = _mm256_set1_ps(`6.`);
51597	assert_eq_m256(r, e);
51598	}
51599
51600	#[simd_test(enable = "avx512f,avx512vl")]
51601	unsafe fn test_mm256_maskz_permutexvar_ps() {
51602	let idx = _mm256_set1_epi32(`1`);
51603	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
51604	let r = _mm256_maskz_permutexvar_ps(`0`, idx, a);
51605	assert_eq_m256(r, _mm256_setzero_ps());
51606	let r = _mm256_maskz_permutexvar_ps(`0b11111111`, idx, a);
51607	let e = _mm256_set1_ps(`6.`);
51608	assert_eq_m256(r, e);
51609	}
51610
51611	#[simd_test(enable = "avx512f")]
51612	unsafe fn test_mm512_permutex2var_epi32() {
51613	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51614	#[rustfmt::skip]
51615	let idx = _mm512_set_epi32(
51616	`1`, `1` << `4`, `2`, `1` << `4`,
51617	`3`, `1` << `4`, `4`, `1` << `4`,
51618	`5`, `1` << `4`, `6`, `1` << `4`,
51619	`7`, `1` << `4`, `8`, `1` << `4`,
51620	);
51621	let b = _mm512_set1_epi32(`100`);
51622	let r = _mm512_permutex2var_epi32(a, idx, b);
51623	let e = _mm512_set_epi32(
51624	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
51625	);
51626	assert_eq_m512i(r, e);
51627	}
51628
51629	#[simd_test(enable = "avx512f")]
51630	unsafe fn test_mm512_mask_permutex2var_epi32() {
51631	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51632	#[rustfmt::skip]
51633	let idx = _mm512_set_epi32(
51634	`1`, `1` << `4`, `2`, `1` << `4`,
51635	`3`, `1` << `4`, `4`, `1` << `4`,
51636	`5`, `1` << `4`, `6`, `1` << `4`,
51637	`7`, `1` << `4`, `8`, `1` << `4`,
51638	);
51639	let b = _mm512_set1_epi32(`100`);
51640	let r = _mm512_mask_permutex2var_epi32(a, `0`, idx, b);
51641	assert_eq_m512i(r, a);
51642	let r = _mm512_mask_permutex2var_epi32(a, `0b11111111_11111111`, idx, b);
51643	let e = _mm512_set_epi32(
51644	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
51645	);
51646	assert_eq_m512i(r, e);
51647	}
51648
51649	#[simd_test(enable = "avx512f")]
51650	unsafe fn test_mm512_maskz_permutex2var_epi32() {
51651	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51652	#[rustfmt::skip]
51653	let idx = _mm512_set_epi32(
51654	`1`, `1` << `4`, `2`, `1` << `4`,
51655	`3`, `1` << `4`, `4`, `1` << `4`,
51656	`5`, `1` << `4`, `6`, `1` << `4`,
51657	`7`, `1` << `4`, `8`, `1` << `4`,
51658	);
51659	let b = _mm512_set1_epi32(`100`);
51660	let r = _mm512_maskz_permutex2var_epi32(`0`, a, idx, b);
51661	assert_eq_m512i(r, _mm512_setzero_si512());
51662	let r = _mm512_maskz_permutex2var_epi32(`0b00000000_11111111`, a, idx, b);
51663	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`);
51664	assert_eq_m512i(r, e);
51665	}
51666
51667	#[simd_test(enable = "avx512f")]
51668	unsafe fn test_mm512_mask2_permutex2var_epi32() {
51669	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51670	#[rustfmt::skip]
51671	let idx = _mm512_set_epi32(
51672	`1000`, `1` << `4`, `2000`, `1` << `4`,
51673	`3000`, `1` << `4`, `4000`, `1` << `4`,
51674	`5`, `1` << `4`, `6`, `1` << `4`,
51675	`7`, `1` << `4`, `8`, `1` << `4`,
51676	);
51677	let b = _mm512_set1_epi32(`100`);
51678	let r = _mm512_mask2_permutex2var_epi32(a, idx, `0`, b);
51679	assert_eq_m512i(r, idx);
51680	let r = _mm512_mask2_permutex2var_epi32(a, idx, `0b00000000_11111111`, b);
51681	#[rustfmt::skip]
51682	let e = _mm512_set_epi32(
51683	`1000`, `1` << `4`, `2000`, `1` << `4`,
51684	`3000`, `1` << `4`, `4000`, `1` << `4`,
51685	`10`, `100`, `9`, `100`,
51686	`8`, `100`, `7`, `100`,
51687	);
51688	assert_eq_m512i(r, e);
51689	}
51690
51691	#[simd_test(enable = "avx512f,avx512vl")]
51692	unsafe fn test_mm256_permutex2var_epi32() {
51693	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
51694	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
51695	let b = _mm256_set1_epi32(`100`);
51696	let r = _mm256_permutex2var_epi32(a, idx, b);
51697	let e = _mm256_set_epi32(`6`, `100`, `5`, `100`, `4`, `100`, `3`, `100`);
51698	assert_eq_m256i(r, e);
51699	}
51700
51701	#[simd_test(enable = "avx512f,avx512vl")]
51702	unsafe fn test_mm256_mask_permutex2var_epi32() {
51703	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
51704	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
51705	let b = _mm256_set1_epi32(`100`);
51706	let r = _mm256_mask_permutex2var_epi32(a, `0`, idx, b);
51707	assert_eq_m256i(r, a);
51708	let r = _mm256_mask_permutex2var_epi32(a, `0b11111111`, idx, b);
51709	let e = _mm256_set_epi32(`6`, `100`, `5`, `100`, `4`, `100`, `3`, `100`);
51710	assert_eq_m256i(r, e);
51711	}
51712
51713	#[simd_test(enable = "avx512f,avx512vl")]
51714	unsafe fn test_mm256_maskz_permutex2var_epi32() {
51715	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
51716	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
51717	let b = _mm256_set1_epi32(`100`);
51718	let r = _mm256_maskz_permutex2var_epi32(`0`, a, idx, b);
51719	assert_eq_m256i(r, _mm256_setzero_si256());
51720	let r = _mm256_maskz_permutex2var_epi32(`0b11111111`, a, idx, b);
51721	let e = _mm256_set_epi32(`6`, `100`, `5`, `100`, `4`, `100`, `3`, `100`);
51722	assert_eq_m256i(r, e);
51723	}
51724
51725	#[simd_test(enable = "avx512f,avx512vl")]
51726	unsafe fn test_mm256_mask2_permutex2var_epi32() {
51727	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
51728	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
51729	let b = _mm256_set1_epi32(`100`);
51730	let r = _mm256_mask2_permutex2var_epi32(a, idx, `0`, b);
51731	assert_eq_m256i(r, idx);
51732	let r = _mm256_mask2_permutex2var_epi32(a, idx, `0b11111111`, b);
51733	let e = _mm256_set_epi32(`6`, `100`, `5`, `100`, `4`, `100`, `3`, `100`);
51734	assert_eq_m256i(r, e);
51735	}
51736
51737	#[simd_test(enable = "avx512f,avx512vl")]
51738	unsafe fn test_mm_permutex2var_epi32() {
51739	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
51740	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
51741	let b = _mm_set1_epi32(`100`);
51742	let r = _mm_permutex2var_epi32(a, idx, b);
51743	let e = _mm_set_epi32(`2`, `100`, `1`, `100`);
51744	assert_eq_m128i(r, e);
51745	}
51746
51747	#[simd_test(enable = "avx512f,avx512vl")]
51748	unsafe fn test_mm_mask_permutex2var_epi32() {
51749	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
51750	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
51751	let b = _mm_set1_epi32(`100`);
51752	let r = _mm_mask_permutex2var_epi32(a, `0`, idx, b);
51753	assert_eq_m128i(r, a);
51754	let r = _mm_mask_permutex2var_epi32(a, `0b00001111`, idx, b);
51755	let e = _mm_set_epi32(`2`, `100`, `1`, `100`);
51756	assert_eq_m128i(r, e);
51757	}
51758
51759	#[simd_test(enable = "avx512f,avx512vl")]
51760	unsafe fn test_mm_maskz_permutex2var_epi32() {
51761	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
51762	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
51763	let b = _mm_set1_epi32(`100`);
51764	let r = _mm_maskz_permutex2var_epi32(`0`, a, idx, b);
51765	assert_eq_m128i(r, _mm_setzero_si128());
51766	let r = _mm_maskz_permutex2var_epi32(`0b00001111`, a, idx, b);
51767	let e = _mm_set_epi32(`2`, `100`, `1`, `100`);
51768	assert_eq_m128i(r, e);
51769	}
51770
51771	#[simd_test(enable = "avx512f,avx512vl")]
51772	unsafe fn test_mm_mask2_permutex2var_epi32() {
51773	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
51774	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
51775	let b = _mm_set1_epi32(`100`);
51776	let r = _mm_mask2_permutex2var_epi32(a, idx, `0`, b);
51777	assert_eq_m128i(r, idx);
51778	let r = _mm_mask2_permutex2var_epi32(a, idx, `0b00001111`, b);
51779	let e = _mm_set_epi32(`2`, `100`, `1`, `100`);
51780	assert_eq_m128i(r, e);
51781	}
51782
51783	#[simd_test(enable = "avx512f")]
51784	unsafe fn test_mm512_permutex2var_ps() {
51785	let a = _mm512_set_ps(
51786	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51787	);
51788	#[rustfmt::skip]
51789	let idx = _mm512_set_epi32(
51790	`1`, `1` << `4`, `2`, `1` << `4`,
51791	`3`, `1` << `4`, `4`, `1` << `4`,
51792	`5`, `1` << `4`, `6`, `1` << `4`,
51793	`7`, `1` << `4`, `8`, `1` << `4`,
51794	);
51795	let b = _mm512_set1_ps(`100.`);
51796	let r = _mm512_permutex2var_ps(a, idx, b);
51797	let e = _mm512_set_ps(
51798	`14.`, `100.`, `13.`, `100.`, `12.`, `100.`, `11.`, `100.`, `10.`, `100.`, `9.`, `100.`, `8.`, `100.`, `7.`, `100.`,
51799	);
51800	assert_eq_m512(r, e);
51801	}
51802
51803	#[simd_test(enable = "avx512f")]
51804	unsafe fn test_mm512_mask_permutex2var_ps() {
51805	let a = _mm512_set_ps(
51806	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51807	);
51808	#[rustfmt::skip]
51809	let idx = _mm512_set_epi32(
51810	`1`, `1` << `4`, `2`, `1` << `4`,
51811	`3`, `1` << `4`, `4`, `1` << `4`,
51812	`5`, `1` << `4`, `6`, `1` << `4`,
51813	`7`, `1` << `4`, `8`, `1` << `4`,
51814	);
51815	let b = _mm512_set1_ps(`100.`);
51816	let r = _mm512_mask_permutex2var_ps(a, `0`, idx, b);
51817	assert_eq_m512(r, a);
51818	let r = _mm512_mask_permutex2var_ps(a, `0b11111111_11111111`, idx, b);
51819	let e = _mm512_set_ps(
51820	`14.`, `100.`, `13.`, `100.`, `12.`, `100.`, `11.`, `100.`, `10.`, `100.`, `9.`, `100.`, `8.`, `100.`, `7.`, `100.`,
51821	);
51822	assert_eq_m512(r, e);
51823	}
51824
51825	#[simd_test(enable = "avx512f")]
51826	unsafe fn test_mm512_maskz_permutex2var_ps() {
51827	let a = _mm512_set_ps(
51828	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51829	);
51830	#[rustfmt::skip]
51831	let idx = _mm512_set_epi32(
51832	`1`, `1` << `4`, `2`, `1` << `4`,
51833	`3`, `1` << `4`, `4`, `1` << `4`,
51834	`5`, `1` << `4`, `6`, `1` << `4`,
51835	`7`, `1` << `4`, `8`, `1` << `4`,
51836	);
51837	let b = _mm512_set1_ps(`100.`);
51838	let r = _mm512_maskz_permutex2var_ps(`0`, a, idx, b);
51839	assert_eq_m512(r, _mm512_setzero_ps());
51840	let r = _mm512_maskz_permutex2var_ps(`0b00000000_11111111`, a, idx, b);
51841	let e = _mm512_set_ps(
51842	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `10.`, `100.`, `9.`, `100.`, `8.`, `100.`, `7.`, `100.`,
51843	);
51844	assert_eq_m512(r, e);
51845	}
51846
51847	#[simd_test(enable = "avx512f")]
51848	unsafe fn test_mm512_mask2_permutex2var_ps() {
51849	let a = _mm512_set_ps(
51850	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
51851	);
51852	#[rustfmt::skip]
51853	let idx = _mm512_set_epi32(
51854	`1`, `1` << `4`, `2`, `1` << `4`,
51855	`3`, `1` << `4`, `4`, `1` << `4`,
51856	`5`, `1` << `4`, `6`, `1` << `4`,
51857	`7`, `1` << `4`, `8`, `1` << `4`,
51858	);
51859	let b = _mm512_set1_ps(`100.`);
51860	let r = _mm512_mask2_permutex2var_ps(a, idx, `0`, b);
51861	assert_eq_m512(r, _mm512_castsi512_ps(idx));
51862	let r = _mm512_mask2_permutex2var_ps(a, idx, `0b11111111_11111111`, b);
51863	let e = _mm512_set_ps(
51864	`14.`, `100.`, `13.`, `100.`, `12.`, `100.`, `11.`, `100.`, `10.`, `100.`, `9.`, `100.`, `8.`, `100.`, `7.`, `100.`,
51865	);
51866	assert_eq_m512(r, e);
51867	}
51868
51869	#[simd_test(enable = "avx512f,avx512vl")]
51870	unsafe fn test_mm256_permutex2var_ps() {
51871	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
51872	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
51873	let b = _mm256_set1_ps(`100.`);
51874	let r = _mm256_permutex2var_ps(a, idx, b);
51875	let e = _mm256_set_ps(`6.`, `100.`, `5.`, `100.`, `4.`, `100.`, `3.`, `100.`);
51876	assert_eq_m256(r, e);
51877	}
51878
51879	#[simd_test(enable = "avx512f,avx512vl")]
51880	unsafe fn test_mm256_mask_permutex2var_ps() {
51881	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
51882	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
51883	let b = _mm256_set1_ps(`100.`);
51884	let r = _mm256_mask_permutex2var_ps(a, `0`, idx, b);
51885	assert_eq_m256(r, a);
51886	let r = _mm256_mask_permutex2var_ps(a, `0b11111111`, idx, b);
51887	let e = _mm256_set_ps(`6.`, `100.`, `5.`, `100.`, `4.`, `100.`, `3.`, `100.`);
51888	assert_eq_m256(r, e);
51889	}
51890
51891	#[simd_test(enable = "avx512f,avx512vl")]
51892	unsafe fn test_mm256_maskz_permutex2var_ps() {
51893	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
51894	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
51895	let b = _mm256_set1_ps(`100.`);
51896	let r = _mm256_maskz_permutex2var_ps(`0`, a, idx, b);
51897	assert_eq_m256(r, _mm256_setzero_ps());
51898	let r = _mm256_maskz_permutex2var_ps(`0b11111111`, a, idx, b);
51899	let e = _mm256_set_ps(`6.`, `100.`, `5.`, `100.`, `4.`, `100.`, `3.`, `100.`);
51900	assert_eq_m256(r, e);
51901	}
51902
51903	#[simd_test(enable = "avx512f,avx512vl")]
51904	unsafe fn test_mm256_mask2_permutex2var_ps() {
51905	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
51906	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
51907	let b = _mm256_set1_ps(`100.`);
51908	let r = _mm256_mask2_permutex2var_ps(a, idx, `0`, b);
51909	assert_eq_m256(r, _mm256_castsi256_ps(idx));
51910	let r = _mm256_mask2_permutex2var_ps(a, idx, `0b11111111`, b);
51911	let e = _mm256_set_ps(`6.`, `100.`, `5.`, `100.`, `4.`, `100.`, `3.`, `100.`);
51912	assert_eq_m256(r, e);
51913	}
51914
51915	#[simd_test(enable = "avx512f,avx512vl")]
51916	unsafe fn test_mm_permutex2var_ps() {
51917	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
51918	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
51919	let b = _mm_set1_ps(`100.`);
51920	let r = _mm_permutex2var_ps(a, idx, b);
51921	let e = _mm_set_ps(`2.`, `100.`, `1.`, `100.`);
51922	assert_eq_m128(r, e);
51923	}
51924
51925	#[simd_test(enable = "avx512f,avx512vl")]
51926	unsafe fn test_mm_mask_permutex2var_ps() {
51927	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
51928	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
51929	let b = _mm_set1_ps(`100.`);
51930	let r = _mm_mask_permutex2var_ps(a, `0`, idx, b);
51931	assert_eq_m128(r, a);
51932	let r = _mm_mask_permutex2var_ps(a, `0b00001111`, idx, b);
51933	let e = _mm_set_ps(`2.`, `100.`, `1.`, `100.`);
51934	assert_eq_m128(r, e);
51935	}
51936
51937	#[simd_test(enable = "avx512f,avx512vl")]
51938	unsafe fn test_mm_maskz_permutex2var_ps() {
51939	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
51940	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
51941	let b = _mm_set1_ps(`100.`);
51942	let r = _mm_maskz_permutex2var_ps(`0`, a, idx, b);
51943	assert_eq_m128(r, _mm_setzero_ps());
51944	let r = _mm_maskz_permutex2var_ps(`0b00001111`, a, idx, b);
51945	let e = _mm_set_ps(`2.`, `100.`, `1.`, `100.`);
51946	assert_eq_m128(r, e);
51947	}
51948
51949	#[simd_test(enable = "avx512f,avx512vl")]
51950	unsafe fn test_mm_mask2_permutex2var_ps() {
51951	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
51952	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
51953	let b = _mm_set1_ps(`100.`);
51954	let r = _mm_mask2_permutex2var_ps(a, idx, `0`, b);
51955	assert_eq_m128(r, _mm_castsi128_ps(idx));
51956	let r = _mm_mask2_permutex2var_ps(a, idx, `0b00001111`, b);
51957	let e = _mm_set_ps(`2.`, `100.`, `1.`, `100.`);
51958	assert_eq_m128(r, e);
51959	}
51960
51961	#[simd_test(enable = "avx512f")]
51962	unsafe fn test_mm512_shuffle_epi32() {
51963	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
51964	let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
51965	let e = _mm512_setr_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`, `8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`);
51966	assert_eq_m512i(r, e);
51967	}
51968
51969	#[simd_test(enable = "avx512f")]
51970	unsafe fn test_mm512_mask_shuffle_epi32() {
51971	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
51972	let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0`, a);
51973	assert_eq_m512i(r, a);
51974	let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0b11111111_11111111`, a);
51975	let e = _mm512_setr_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`, `8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`);
51976	assert_eq_m512i(r, e);
51977	}
51978
51979	#[simd_test(enable = "avx512f")]
51980	unsafe fn test_mm512_maskz_shuffle_epi32() {
51981	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
51982	let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0`, a);
51983	assert_eq_m512i(r, _mm512_setzero_si512());
51984	let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0b00000000_11111111`, a);
51985	let e = _mm512_setr_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51986	assert_eq_m512i(r, e);
51987	}
51988
51989	#[simd_test(enable = "avx512f,avx512vl")]
51990	unsafe fn test_mm256_mask_shuffle_epi32() {
51991	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
51992	let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0`, a);
51993	assert_eq_m256i(r, a);
51994	let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0b11111111`, a);
51995	let e = _mm256_set_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`);
51996	assert_eq_m256i(r, e);
51997	}
51998
51999	#[simd_test(enable = "avx512f,avx512vl")]
52000	unsafe fn test_mm256_maskz_shuffle_epi32() {
52001	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
52002	let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0`, a);
52003	assert_eq_m256i(r, _mm256_setzero_si256());
52004	let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0b11111111`, a);
52005	let e = _mm256_set_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`);
52006	assert_eq_m256i(r, e);
52007	}
52008
52009	#[simd_test(enable = "avx512f,avx512vl")]
52010	unsafe fn test_mm_mask_shuffle_epi32() {
52011	let a = _mm_set_epi32(`1`, `4`, `5`, `8`);
52012	let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0`, a);
52013	assert_eq_m128i(r, a);
52014	let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0b00001111`, a);
52015	let e = _mm_set_epi32(`8`, `8`, `1`, `1`);
52016	assert_eq_m128i(r, e);
52017	}
52018
52019	#[simd_test(enable = "avx512f,avx512vl")]
52020	unsafe fn test_mm_maskz_shuffle_epi32() {
52021	let a = _mm_set_epi32(`1`, `4`, `5`, `8`);
52022	let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0`, a);
52023	assert_eq_m128i(r, _mm_setzero_si128());
52024	let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0b00001111`, a);
52025	let e = _mm_set_epi32(`8`, `8`, `1`, `1`);
52026	assert_eq_m128i(r, e);
52027	}
52028
52029	#[simd_test(enable = "avx512f")]
52030	unsafe fn test_mm512_shuffle_ps() {
52031	let a = _mm512_setr_ps(
52032	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
52033	);
52034	let b = _mm512_setr_ps(
52035	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
52036	);
52037	let r = _mm512_shuffle_ps::<`0b00_00_11_11`>(a, b);
52038	let e = _mm512_setr_ps(
52039	`8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`, `8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`,
52040	);
52041	assert_eq_m512(r, e);
52042	}
52043
52044	#[simd_test(enable = "avx512f")]
52045	unsafe fn test_mm512_mask_shuffle_ps() {
52046	let a = _mm512_setr_ps(
52047	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
52048	);
52049	let b = _mm512_setr_ps(
52050	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
52051	);
52052	let r = _mm512_mask_shuffle_ps::<`0b00_00_11_11`>(a, `0`, a, b);
52053	assert_eq_m512(r, a);
52054	let r = _mm512_mask_shuffle_ps::<`0b00_00_11_11`>(a, `0b11111111_11111111`, a, b);
52055	let e = _mm512_setr_ps(
52056	`8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`, `8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`,
52057	);
52058	assert_eq_m512(r, e);
52059	}
52060
52061	#[simd_test(enable = "avx512f")]
52062	unsafe fn test_mm512_maskz_shuffle_ps() {
52063	let a = _mm512_setr_ps(
52064	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
52065	);
52066	let b = _mm512_setr_ps(
52067	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
52068	);
52069	let r = _mm512_maskz_shuffle_ps::<`0b00_00_11_11`>(`0`, a, b);
52070	assert_eq_m512(r, _mm512_setzero_ps());
52071	let r = _mm512_maskz_shuffle_ps::<`0b00_00_11_11`>(`0b00000000_11111111`, a, b);
52072	let e = _mm512_setr_ps(
52073	`8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
52074	);
52075	assert_eq_m512(r, e);
52076	}
52077
52078	#[simd_test(enable = "avx512f,avx512vl")]
52079	unsafe fn test_mm256_mask_shuffle_ps() {
52080	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
52081	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
52082	let r = _mm256_mask_shuffle_ps::<`0b11_11_11_11`>(a, `0`, a, b);
52083	assert_eq_m256(r, a);
52084	let r = _mm256_mask_shuffle_ps::<`0b00_00_11_11`>(a, `0b11111111`, a, b);
52085	let e = _mm256_set_ps(`7.`, `7.`, `1.`, `1.`, `15.`, `15.`, `9.`, `9.`);
52086	assert_eq_m256(r, e);
52087	}
52088
52089	#[simd_test(enable = "avx512f,avx512vl")]
52090	unsafe fn test_mm256_maskz_shuffle_ps() {
52091	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
52092	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
52093	let r = _mm256_maskz_shuffle_ps::<`0b11_11_11_11`>(`0`, a, b);
52094	assert_eq_m256(r, _mm256_setzero_ps());
52095	let r = _mm256_maskz_shuffle_ps::<`0b00_00_11_11`>(`0b11111111`, a, b);
52096	let e = _mm256_set_ps(`7.`, `7.`, `1.`, `1.`, `15.`, `15.`, `9.`, `9.`);
52097	assert_eq_m256(r, e);
52098	}
52099
52100	#[simd_test(enable = "avx512f,avx512vl")]
52101	unsafe fn test_mm_mask_shuffle_ps() {
52102	let a = _mm_set_ps(`1.`, `4.`, `5.`, `8.`);
52103	let b = _mm_set_ps(`2.`, `3.`, `6.`, `7.`);
52104	let r = _mm_mask_shuffle_ps::<`0b11_11_11_11`>(a, `0`, a, b);
52105	assert_eq_m128(r, a);
52106	let r = _mm_mask_shuffle_ps::<`0b00_00_11_11`>(a, `0b00001111`, a, b);
52107	let e = _mm_set_ps(`7.`, `7.`, `1.`, `1.`);
52108	assert_eq_m128(r, e);
52109	}
52110
52111	#[simd_test(enable = "avx512f,avx512vl")]
52112	unsafe fn test_mm_maskz_shuffle_ps() {
52113	let a = _mm_set_ps(`1.`, `4.`, `5.`, `8.`);
52114	let b = _mm_set_ps(`2.`, `3.`, `6.`, `7.`);
52115	let r = _mm_maskz_shuffle_ps::<`0b11_11_11_11`>(`0`, a, b);
52116	assert_eq_m128(r, _mm_setzero_ps());
52117	let r = _mm_maskz_shuffle_ps::<`0b00_00_11_11`>(`0b00001111`, a, b);
52118	let e = _mm_set_ps(`7.`, `7.`, `1.`, `1.`);
52119	assert_eq_m128(r, e);
52120	}
52121
52122	#[simd_test(enable = "avx512f")]
52123	unsafe fn test_mm512_shuffle_i32x4() {
52124	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
52125	let b = _mm512_setr_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`, `2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
52126	let r = _mm512_shuffle_i32x4::<`0b00_00_00_00`>(a, b);
52127	let e = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `1`, `4`, `5`, `8`, `2`, `3`, `6`, `7`, `2`, `3`, `6`, `7`);
52128	assert_eq_m512i(r, e);
52129	}
52130
52131	#[simd_test(enable = "avx512f")]
52132	unsafe fn test_mm512_mask_shuffle_i32x4() {
52133	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
52134	let b = _mm512_setr_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`, `2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
52135	let r = _mm512_mask_shuffle_i32x4::<`0b00_00_00_00`>(a, `0`, a, b);
52136	assert_eq_m512i(r, a);
52137	let r = _mm512_mask_shuffle_i32x4::<`0b00_00_00_00`>(a, `0b11111111_11111111`, a, b);
52138	let e = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `1`, `4`, `5`, `8`, `2`, `3`, `6`, `7`, `2`, `3`, `6`, `7`);
52139	assert_eq_m512i(r, e);
52140	}
52141
52142	#[simd_test(enable = "avx512f")]
52143	unsafe fn test_mm512_maskz_shuffle_i32x4() {
52144	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
52145	let b = _mm512_setr_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`, `2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
52146	let r = _mm512_maskz_shuffle_i32x4::<`0b00_00_00_00`>(`0`, a, b);
52147	assert_eq_m512i(r, _mm512_setzero_si512());
52148	let r = _mm512_maskz_shuffle_i32x4::<`0b00_00_00_00`>(`0b00000000_11111111`, a, b);
52149	let e = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `1`, `4`, `5`, `8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52150	assert_eq_m512i(r, e);
52151	}
52152
52153	#[simd_test(enable = "avx512f,avx512vl")]
52154	unsafe fn test_mm256_shuffle_i32x4() {
52155	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
52156	let b = _mm256_set_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
52157	let r = _mm256_shuffle_i32x4::<`0b00`>(a, b);
52158	let e = _mm256_set_epi32(`10`, `11`, `14`, `15`, `9`, `12`, `13`, `16`);
52159	assert_eq_m256i(r, e);
52160	}
52161
52162	#[simd_test(enable = "avx512f,avx512vl")]
52163	unsafe fn test_mm256_mask_shuffle_i32x4() {
52164	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
52165	let b = _mm256_set_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
52166	let r = _mm256_mask_shuffle_i32x4::<`0b00`>(a, `0`, a, b);
52167	assert_eq_m256i(r, a);
52168	let r = _mm256_mask_shuffle_i32x4::<`0b00`>(a, `0b11111111`, a, b);
52169	let e = _mm256_set_epi32(`10`, `11`, `14`, `15`, `9`, `12`, `13`, `16`);
52170	assert_eq_m256i(r, e);
52171	}
52172
52173	#[simd_test(enable = "avx512f,avx512vl")]
52174	unsafe fn test_mm256_maskz_shuffle_i32x4() {
52175	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
52176	let b = _mm256_set_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
52177	let r = _mm256_maskz_shuffle_i32x4::<`0b00`>(`0`, a, b);
52178	assert_eq_m256i(r, _mm256_setzero_si256());
52179	let r = _mm256_maskz_shuffle_i32x4::<`0b00`>(`0b11111111`, a, b);
52180	let e = _mm256_set_epi32(`10`, `11`, `14`, `15`, `9`, `12`, `13`, `16`);
52181	assert_eq_m256i(r, e);
52182	}
52183
52184	#[simd_test(enable = "avx512f")]
52185	unsafe fn test_mm512_shuffle_f32x4() {
52186	let a = _mm512_setr_ps(
52187	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
52188	);
52189	let b = _mm512_setr_ps(
52190	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
52191	);
52192	let r = _mm512_shuffle_f32x4::<`0b00_00_00_00`>(a, b);
52193	let e = _mm512_setr_ps(
52194	`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`, `2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`,
52195	);
52196	assert_eq_m512(r, e);
52197	}
52198
52199	#[simd_test(enable = "avx512f")]
52200	unsafe fn test_mm512_mask_shuffle_f32x4() {
52201	let a = _mm512_setr_ps(
52202	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
52203	);
52204	let b = _mm512_setr_ps(
52205	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
52206	);
52207	let r = _mm512_mask_shuffle_f32x4::<`0b00_00_00_00`>(a, `0`, a, b);
52208	assert_eq_m512(r, a);
52209	let r = _mm512_mask_shuffle_f32x4::<`0b00_00_00_00`>(a, `0b11111111_11111111`, a, b);
52210	let e = _mm512_setr_ps(
52211	`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`, `2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`,
52212	);
52213	assert_eq_m512(r, e);
52214	}
52215
52216	#[simd_test(enable = "avx512f")]
52217	unsafe fn test_mm512_maskz_shuffle_f32x4() {
52218	let a = _mm512_setr_ps(
52219	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
52220	);
52221	let b = _mm512_setr_ps(
52222	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
52223	);
52224	let r = _mm512_maskz_shuffle_f32x4::<`0b00_00_00_00`>(`0`, a, b);
52225	assert_eq_m512(r, _mm512_setzero_ps());
52226	let r = _mm512_maskz_shuffle_f32x4::<`0b00_00_00_00`>(`0b00000000_11111111`, a, b);
52227	let e = _mm512_setr_ps(
52228	`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
52229	);
52230	assert_eq_m512(r, e);
52231	}
52232
52233	#[simd_test(enable = "avx512f,avx512vl")]
52234	unsafe fn test_mm256_shuffle_f32x4() {
52235	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
52236	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
52237	let r = _mm256_shuffle_f32x4::<`0b00`>(a, b);
52238	let e = _mm256_set_ps(`10.`, `11.`, `14.`, `15.`, `9.`, `12.`, `13.`, `16.`);
52239	assert_eq_m256(r, e);
52240	}
52241
52242	#[simd_test(enable = "avx512f,avx512vl")]
52243	unsafe fn test_mm256_mask_shuffle_f32x4() {
52244	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
52245	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
52246	let r = _mm256_mask_shuffle_f32x4::<`0b00`>(a, `0`, a, b);
52247	assert_eq_m256(r, a);
52248	let r = _mm256_mask_shuffle_f32x4::<`0b00`>(a, `0b11111111`, a, b);
52249	let e = _mm256_set_ps(`10.`, `11.`, `14.`, `15.`, `9.`, `12.`, `13.`, `16.`);
52250	assert_eq_m256(r, e);
52251	}
52252
52253	#[simd_test(enable = "avx512f,avx512vl")]
52254	unsafe fn test_mm256_maskz_shuffle_f32x4() {
52255	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
52256	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
52257	let r = _mm256_maskz_shuffle_f32x4::<`0b00`>(`0`, a, b);
52258	assert_eq_m256(r, _mm256_setzero_ps());
52259	let r = _mm256_maskz_shuffle_f32x4::<`0b00`>(`0b11111111`, a, b);
52260	let e = _mm256_set_ps(`10.`, `11.`, `14.`, `15.`, `9.`, `12.`, `13.`, `16.`);
52261	assert_eq_m256(r, e);
52262	}
52263
52264	#[simd_test(enable = "avx512f")]
52265	unsafe fn test_mm512_extractf32x4_ps() {
52266	let a = _mm512_setr_ps(
52267	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52268	);
52269	let r = _mm512_extractf32x4_ps::<`1`>(a);
52270	let e = _mm_setr_ps(`5.`, `6.`, `7.`, `8.`);
52271	assert_eq_m128(r, e);
52272	}
52273
52274	#[simd_test(enable = "avx512f")]
52275	unsafe fn test_mm512_mask_extractf32x4_ps() {
52276	let a = _mm512_setr_ps(
52277	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52278	);
52279	let src = _mm_set1_ps(`100.`);
52280	let r = _mm512_mask_extractf32x4_ps::<`1`>(src, `0`, a);
52281	assert_eq_m128(r, src);
52282	let r = _mm512_mask_extractf32x4_ps::<`1`>(src, `0b11111111`, a);
52283	let e = _mm_setr_ps(`5.`, `6.`, `7.`, `8.`);
52284	assert_eq_m128(r, e);
52285	}
52286
52287	#[simd_test(enable = "avx512f")]
52288	unsafe fn test_mm512_maskz_extractf32x4_ps() {
52289	let a = _mm512_setr_ps(
52290	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52291	);
52292	let r = _mm512_maskz_extractf32x4_ps::<`1`>(`0`, a);
52293	assert_eq_m128(r, _mm_setzero_ps());
52294	let r = _mm512_maskz_extractf32x4_ps::<`1`>(`0b00000001`, a);
52295	let e = _mm_setr_ps(`5.`, `0.`, `0.`, `0.`);
52296	assert_eq_m128(r, e);
52297	}
52298
52299	#[simd_test(enable = "avx512f,avx512vl")]
52300	unsafe fn test_mm256_extractf32x4_ps() {
52301	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
52302	let r = _mm256_extractf32x4_ps::<`1`>(a);
52303	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
52304	assert_eq_m128(r, e);
52305	}
52306
52307	#[simd_test(enable = "avx512f,avx512vl")]
52308	unsafe fn test_mm256_mask_extractf32x4_ps() {
52309	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
52310	let src = _mm_set1_ps(`100.`);
52311	let r = _mm256_mask_extractf32x4_ps::<`1`>(src, `0`, a);
52312	assert_eq_m128(r, src);
52313	let r = _mm256_mask_extractf32x4_ps::<`1`>(src, `0b00001111`, a);
52314	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
52315	assert_eq_m128(r, e);
52316	}
52317
52318	#[simd_test(enable = "avx512f,avx512vl")]
52319	unsafe fn test_mm256_maskz_extractf32x4_ps() {
52320	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
52321	let r = _mm256_maskz_extractf32x4_ps::<`1`>(`0`, a);
52322	assert_eq_m128(r, _mm_setzero_ps());
52323	let r = _mm256_maskz_extractf32x4_ps::<`1`>(`0b00001111`, a);
52324	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
52325	assert_eq_m128(r, e);
52326	}
52327
52328	#[simd_test(enable = "avx512f")]
52329	unsafe fn test_mm512_extracti32x4_epi32() {
52330	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
52331	let r = _mm512_extracti32x4_epi32::<`1`>(a);
52332	let e = _mm_setr_epi32(`5`, `6`, `7`, `8`);
52333	assert_eq_m128i(r, e);
52334	}
52335
52336	#[simd_test(enable = "avx512f")]
52337	unsafe fn test_mm512_mask_extracti32x4_epi32() {
52338	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
52339	let src = _mm_set1_epi32(`100`);
52340	let r = _mm512_mask_extracti32x4_epi32::<`1`>(src, `0`, a);
52341	assert_eq_m128i(r, src);
52342	let r = _mm512_mask_extracti32x4_epi32::<`1`>(src, `0b11111111`, a);
52343	let e = _mm_setr_epi32(`5`, `6`, `7`, `8`);
52344	assert_eq_m128i(r, e);
52345	}
52346
52347	#[simd_test(enable = "avx512f,avx512vl")]
52348	unsafe fn test_mm512_maskz_extracti32x4_epi32() {
52349	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
52350	let r = _mm512_maskz_extracti32x4_epi32::<`1`>(`0`, a);
52351	assert_eq_m128i(r, _mm_setzero_si128());
52352	let r = _mm512_maskz_extracti32x4_epi32::<`1`>(`0b00000001`, a);
52353	let e = _mm_setr_epi32(`5`, `0`, `0`, `0`);
52354	assert_eq_m128i(r, e);
52355	}
52356
52357	#[simd_test(enable = "avx512f,avx512vl")]
52358	unsafe fn test_mm256_extracti32x4_epi32() {
52359	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
52360	let r = _mm256_extracti32x4_epi32::<`1`>(a);
52361	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
52362	assert_eq_m128i(r, e);
52363	}
52364
52365	#[simd_test(enable = "avx512f,avx512vl")]
52366	unsafe fn test_mm256_mask_extracti32x4_epi32() {
52367	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
52368	let src = _mm_set1_epi32(`100`);
52369	let r = _mm256_mask_extracti32x4_epi32::<`1`>(src, `0`, a);
52370	assert_eq_m128i(r, src);
52371	let r = _mm256_mask_extracti32x4_epi32::<`1`>(src, `0b00001111`, a);
52372	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
52373	assert_eq_m128i(r, e);
52374	}
52375
52376	#[simd_test(enable = "avx512f,avx512vl")]
52377	unsafe fn test_mm256_maskz_extracti32x4_epi32() {
52378	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
52379	let r = _mm256_maskz_extracti32x4_epi32::<`1`>(`0`, a);
52380	assert_eq_m128i(r, _mm_setzero_si128());
52381	let r = _mm256_maskz_extracti32x4_epi32::<`1`>(`0b00001111`, a);
52382	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
52383	assert_eq_m128i(r, e);
52384	}
52385
52386	#[simd_test(enable = "avx512f")]
52387	unsafe fn test_mm512_moveldup_ps() {
52388	let a = _mm512_setr_ps(
52389	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52390	);
52391	let r = _mm512_moveldup_ps(a);
52392	let e = _mm512_setr_ps(
52393	`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`, `9.`, `9.`, `11.`, `11.`, `13.`, `13.`, `15.`, `15.`,
52394	);
52395	assert_eq_m512(r, e);
52396	}
52397
52398	#[simd_test(enable = "avx512f")]
52399	unsafe fn test_mm512_mask_moveldup_ps() {
52400	let a = _mm512_setr_ps(
52401	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52402	);
52403	let r = _mm512_mask_moveldup_ps(a, `0`, a);
52404	assert_eq_m512(r, a);
52405	let r = _mm512_mask_moveldup_ps(a, `0b11111111_11111111`, a);
52406	let e = _mm512_setr_ps(
52407	`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`, `9.`, `9.`, `11.`, `11.`, `13.`, `13.`, `15.`, `15.`,
52408	);
52409	assert_eq_m512(r, e);
52410	}
52411
52412	#[simd_test(enable = "avx512f")]
52413	unsafe fn test_mm512_maskz_moveldup_ps() {
52414	let a = _mm512_setr_ps(
52415	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52416	);
52417	let r = _mm512_maskz_moveldup_ps(`0`, a);
52418	assert_eq_m512(r, _mm512_setzero_ps());
52419	let r = _mm512_maskz_moveldup_ps(`0b00000000_11111111`, a);
52420	let e = _mm512_setr_ps(
52421	`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
52422	);
52423	assert_eq_m512(r, e);
52424	}
52425
52426	#[simd_test(enable = "avx512f,avx512vl")]
52427	unsafe fn test_mm256_mask_moveldup_ps() {
52428	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
52429	let r = _mm256_mask_moveldup_ps(a, `0`, a);
52430	assert_eq_m256(r, a);
52431	let r = _mm256_mask_moveldup_ps(a, `0b11111111`, a);
52432	let e = _mm256_set_ps(`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`);
52433	assert_eq_m256(r, e);
52434	}
52435
52436	#[simd_test(enable = "avx512f,avx512vl")]
52437	unsafe fn test_mm256_maskz_moveldup_ps() {
52438	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
52439	let r = _mm256_maskz_moveldup_ps(`0`, a);
52440	assert_eq_m256(r, _mm256_setzero_ps());
52441	let r = _mm256_maskz_moveldup_ps(`0b11111111`, a);
52442	let e = _mm256_set_ps(`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`);
52443	assert_eq_m256(r, e);
52444	}
52445
52446	#[simd_test(enable = "avx512f,avx512vl")]
52447	unsafe fn test_mm_mask_moveldup_ps() {
52448	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
52449	let r = _mm_mask_moveldup_ps(a, `0`, a);
52450	assert_eq_m128(r, a);
52451	let r = _mm_mask_moveldup_ps(a, `0b00001111`, a);
52452	let e = _mm_set_ps(`2.`, `2.`, `4.`, `4.`);
52453	assert_eq_m128(r, e);
52454	}
52455
52456	#[simd_test(enable = "avx512f,avx512vl")]
52457	unsafe fn test_mm_maskz_moveldup_ps() {
52458	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
52459	let r = _mm_maskz_moveldup_ps(`0`, a);
52460	assert_eq_m128(r, _mm_setzero_ps());
52461	let r = _mm_maskz_moveldup_ps(`0b00001111`, a);
52462	let e = _mm_set_ps(`2.`, `2.`, `4.`, `4.`);
52463	assert_eq_m128(r, e);
52464	}
52465
52466	#[simd_test(enable = "avx512f")]
52467	unsafe fn test_mm512_movehdup_ps() {
52468	let a = _mm512_setr_ps(
52469	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52470	);
52471	let r = _mm512_movehdup_ps(a);
52472	let e = _mm512_setr_ps(
52473	`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`, `10.`, `10.`, `12.`, `12.`, `14.`, `14.`, `16.`, `16.`,
52474	);
52475	assert_eq_m512(r, e);
52476	}
52477
52478	#[simd_test(enable = "avx512f")]
52479	unsafe fn test_mm512_mask_movehdup_ps() {
52480	let a = _mm512_setr_ps(
52481	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52482	);
52483	let r = _mm512_mask_movehdup_ps(a, `0`, a);
52484	assert_eq_m512(r, a);
52485	let r = _mm512_mask_movehdup_ps(a, `0b11111111_11111111`, a);
52486	let e = _mm512_setr_ps(
52487	`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`, `10.`, `10.`, `12.`, `12.`, `14.`, `14.`, `16.`, `16.`,
52488	);
52489	assert_eq_m512(r, e);
52490	}
52491
52492	#[simd_test(enable = "avx512f")]
52493	unsafe fn test_mm512_maskz_movehdup_ps() {
52494	let a = _mm512_setr_ps(
52495	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52496	);
52497	let r = _mm512_maskz_movehdup_ps(`0`, a);
52498	assert_eq_m512(r, _mm512_setzero_ps());
52499	let r = _mm512_maskz_movehdup_ps(`0b00000000_11111111`, a);
52500	let e = _mm512_setr_ps(
52501	`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
52502	);
52503	assert_eq_m512(r, e);
52504	}
52505
52506	#[simd_test(enable = "avx512f,avx512vl")]
52507	unsafe fn test_mm256_mask_movehdup_ps() {
52508	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
52509	let r = _mm256_mask_movehdup_ps(a, `0`, a);
52510	assert_eq_m256(r, a);
52511	let r = _mm256_mask_movehdup_ps(a, `0b11111111`, a);
52512	let e = _mm256_set_ps(`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`);
52513	assert_eq_m256(r, e);
52514	}
52515
52516	#[simd_test(enable = "avx512f,avx512vl")]
52517	unsafe fn test_mm256_maskz_movehdup_ps() {
52518	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
52519	let r = _mm256_maskz_movehdup_ps(`0`, a);
52520	assert_eq_m256(r, _mm256_setzero_ps());
52521	let r = _mm256_maskz_movehdup_ps(`0b11111111`, a);
52522	let e = _mm256_set_ps(`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`);
52523	assert_eq_m256(r, e);
52524	}
52525
52526	#[simd_test(enable = "avx512f,avx512vl")]
52527	unsafe fn test_mm_mask_movehdup_ps() {
52528	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
52529	let r = _mm_mask_movehdup_ps(a, `0`, a);
52530	assert_eq_m128(r, a);
52531	let r = _mm_mask_movehdup_ps(a, `0b00001111`, a);
52532	let e = _mm_set_ps(`1.`, `1.`, `3.`, `3.`);
52533	assert_eq_m128(r, e);
52534	}
52535
52536	#[simd_test(enable = "avx512f,avx512vl")]
52537	unsafe fn test_mm_maskz_movehdup_ps() {
52538	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
52539	let r = _mm_maskz_movehdup_ps(`0`, a);
52540	assert_eq_m128(r, _mm_setzero_ps());
52541	let r = _mm_maskz_movehdup_ps(`0b00001111`, a);
52542	let e = _mm_set_ps(`1.`, `1.`, `3.`, `3.`);
52543	assert_eq_m128(r, e);
52544	}
52545
52546	#[simd_test(enable = "avx512f")]
52547	unsafe fn test_mm512_inserti32x4() {
52548	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
52549	let b = _mm_setr_epi32(`17`, `18`, `19`, `20`);
52550	let r = _mm512_inserti32x4::<`0`>(a, b);
52551	let e = _mm512_setr_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
52552	assert_eq_m512i(r, e);
52553	}
52554
52555	#[simd_test(enable = "avx512f")]
52556	unsafe fn test_mm512_mask_inserti32x4() {
52557	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
52558	let b = _mm_setr_epi32(`17`, `18`, `19`, `20`);
52559	let r = _mm512_mask_inserti32x4::<`0`>(a, `0`, a, b);
52560	assert_eq_m512i(r, a);
52561	let r = _mm512_mask_inserti32x4::<`0`>(a, `0b11111111_11111111`, a, b);
52562	let e = _mm512_setr_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
52563	assert_eq_m512i(r, e);
52564	}
52565
52566	#[simd_test(enable = "avx512f")]
52567	unsafe fn test_mm512_maskz_inserti32x4() {
52568	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
52569	let b = _mm_setr_epi32(`17`, `18`, `19`, `20`);
52570	let r = _mm512_maskz_inserti32x4::<`0`>(`0`, a, b);
52571	assert_eq_m512i(r, _mm512_setzero_si512());
52572	let r = _mm512_maskz_inserti32x4::<`0`>(`0b00000000_11111111`, a, b);
52573	let e = _mm512_setr_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52574	assert_eq_m512i(r, e);
52575	}
52576
52577	#[simd_test(enable = "avx512f,avx512vl")]
52578	unsafe fn test_mm256_inserti32x4() {
52579	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
52580	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
52581	let r = _mm256_inserti32x4::<`1`>(a, b);
52582	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`);
52583	assert_eq_m256i(r, e);
52584	}
52585
52586	#[simd_test(enable = "avx512f,avx512vl")]
52587	unsafe fn test_mm256_mask_inserti32x4() {
52588	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
52589	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
52590	let r = _mm256_mask_inserti32x4::<`0`>(a, `0`, a, b);
52591	assert_eq_m256i(r, a);
52592	let r = _mm256_mask_inserti32x4::<`1`>(a, `0b11111111`, a, b);
52593	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`);
52594	assert_eq_m256i(r, e);
52595	}
52596
52597	#[simd_test(enable = "avx512f,avx512vl")]
52598	unsafe fn test_mm256_maskz_inserti32x4() {
52599	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
52600	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
52601	let r = _mm256_maskz_inserti32x4::<`0`>(`0`, a, b);
52602	assert_eq_m256i(r, _mm256_setzero_si256());
52603	let r = _mm256_maskz_inserti32x4::<`1`>(`0b11111111`, a, b);
52604	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`);
52605	assert_eq_m256i(r, e);
52606	}
52607
52608	#[simd_test(enable = "avx512f")]
52609	unsafe fn test_mm512_insertf32x4() {
52610	let a = _mm512_setr_ps(
52611	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52612	);
52613	let b = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
52614	let r = _mm512_insertf32x4::<`0`>(a, b);
52615	let e = _mm512_setr_ps(
52616	`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52617	);
52618	assert_eq_m512(r, e);
52619	}
52620
52621	#[simd_test(enable = "avx512f")]
52622	unsafe fn test_mm512_mask_insertf32x4() {
52623	let a = _mm512_setr_ps(
52624	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52625	);
52626	let b = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
52627	let r = _mm512_mask_insertf32x4::<`0`>(a, `0`, a, b);
52628	assert_eq_m512(r, a);
52629	let r = _mm512_mask_insertf32x4::<`0`>(a, `0b11111111_11111111`, a, b);
52630	let e = _mm512_setr_ps(
52631	`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52632	);
52633	assert_eq_m512(r, e);
52634	}
52635
52636	#[simd_test(enable = "avx512f")]
52637	unsafe fn test_mm512_maskz_insertf32x4() {
52638	let a = _mm512_setr_ps(
52639	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
52640	);
52641	let b = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
52642	let r = _mm512_maskz_insertf32x4::<`0`>(`0`, a, b);
52643	assert_eq_m512(r, _mm512_setzero_ps());
52644	let r = _mm512_maskz_insertf32x4::<`0`>(`0b00000000_11111111`, a, b);
52645	let e = _mm512_setr_ps(
52646	`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
52647	);
52648	assert_eq_m512(r, e);
52649	}
52650
52651	#[simd_test(enable = "avx512f,avx512vl")]
52652	unsafe fn test_mm256_insertf32x4() {
52653	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
52654	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52655	let r = _mm256_insertf32x4::<`1`>(a, b);
52656	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`);
52657	assert_eq_m256(r, e);
52658	}
52659
52660	#[simd_test(enable = "avx512f,avx512vl")]
52661	unsafe fn test_mm256_mask_insertf32x4() {
52662	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
52663	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52664	let r = _mm256_mask_insertf32x4::<`0`>(a, `0`, a, b);
52665	assert_eq_m256(r, a);
52666	let r = _mm256_mask_insertf32x4::<`1`>(a, `0b11111111`, a, b);
52667	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`);
52668	assert_eq_m256(r, e);
52669	}
52670
52671	#[simd_test(enable = "avx512f,avx512vl")]
52672	unsafe fn test_mm256_maskz_insertf32x4() {
52673	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
52674	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52675	let r = _mm256_maskz_insertf32x4::<`0`>(`0`, a, b);
52676	assert_eq_m256(r, _mm256_setzero_ps());
52677	let r = _mm256_maskz_insertf32x4::<`1`>(`0b11111111`, a, b);
52678	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`);
52679	assert_eq_m256(r, e);
52680	}
52681
52682	#[simd_test(enable = "avx512f")]
52683	unsafe fn test_mm512_castps128_ps512() {
52684	let a = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
52685	let r = _mm512_castps128_ps512(a);
52686	let e = _mm512_setr_ps(
52687	`17.`, `18.`, `19.`, `20.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`,
52688	);
52689	assert_eq_m512(r, e);
52690	}
52691
52692	#[simd_test(enable = "avx512f")]
52693	unsafe fn test_mm512_castps256_ps512() {
52694	let a = _mm256_setr_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
52695	let r = _mm512_castps256_ps512(a);
52696	let e = _mm512_setr_ps(
52697	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`,
52698	);
52699	assert_eq_m512(r, e);
52700	}
52701
52702	#[simd_test(enable = "avx512f")]
52703	unsafe fn test_mm512_zextps128_ps512() {
52704	let a = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
52705	let r = _mm512_zextps128_ps512(a);
52706	let e = _mm512_setr_ps(
52707	`17.`, `18.`, `19.`, `20.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
52708	);
52709	assert_eq_m512(r, e);
52710	}
52711
52712	#[simd_test(enable = "avx512f")]
52713	unsafe fn test_mm512_zextps256_ps512() {
52714	let a = _mm256_setr_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
52715	let r = _mm512_zextps256_ps512(a);
52716	let e = _mm512_setr_ps(
52717	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
52718	);
52719	assert_eq_m512(r, e);
52720	}
52721
52722	#[simd_test(enable = "avx512f")]
52723	unsafe fn test_mm512_castps512_ps128() {
52724	let a = _mm512_setr_ps(
52725	`17.`, `18.`, `19.`, `20.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`,
52726	);
52727	let r = _mm512_castps512_ps128(a);
52728	let e = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
52729	assert_eq_m128(r, e);
52730	}
52731
52732	#[simd_test(enable = "avx512f")]
52733	unsafe fn test_mm512_castps512_ps256() {
52734	let a = _mm512_setr_ps(
52735	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`,
52736	);
52737	let r = _mm512_castps512_ps256(a);
52738	let e = _mm256_setr_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
52739	assert_eq_m256(r, e);
52740	}
52741
52742	#[simd_test(enable = "avx512f")]
52743	unsafe fn test_mm512_castps_pd() {
52744	let a = _mm512_set1_ps(`1.`);
52745	let r = _mm512_castps_pd(a);
52746	let e = _mm512_set1_pd(`0.007812501848093234`);
52747	assert_eq_m512d(r, e);
52748	}
52749
52750	#[simd_test(enable = "avx512f")]
52751	unsafe fn test_mm512_castps_si512() {
52752	let a = _mm512_set1_ps(`1.`);
52753	let r = _mm512_castps_si512(a);
52754	let e = _mm512_set1_epi32(`1065353216`);
52755	assert_eq_m512i(r, e);
52756	}
52757
52758	#[simd_test(enable = "avx512f")]
52759	unsafe fn test_mm512_broadcastd_epi32() {
52760	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52761	let r = _mm512_broadcastd_epi32(a);
52762	let e = _mm512_set1_epi32(`20`);
52763	assert_eq_m512i(r, e);
52764	}
52765
52766	#[simd_test(enable = "avx512f")]
52767	unsafe fn test_mm512_mask_broadcastd_epi32() {
52768	let src = _mm512_set1_epi32(`20`);
52769	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52770	let r = _mm512_mask_broadcastd_epi32(src, `0`, a);
52771	assert_eq_m512i(r, src);
52772	let r = _mm512_mask_broadcastd_epi32(src, `0b11111111_11111111`, a);
52773	let e = _mm512_set1_epi32(`20`);
52774	assert_eq_m512i(r, e);
52775	}
52776
52777	#[simd_test(enable = "avx512f")]
52778	unsafe fn test_mm512_maskz_broadcastd_epi32() {
52779	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52780	let r = _mm512_maskz_broadcastd_epi32(`0`, a);
52781	assert_eq_m512i(r, _mm512_setzero_si512());
52782	let r = _mm512_maskz_broadcastd_epi32(`0b00000000_11111111`, a);
52783	let e = _mm512_setr_epi32(`20`, `20`, `20`, `20`, `20`, `20`, `20`, `20`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52784	assert_eq_m512i(r, e);
52785	}
52786
52787	#[simd_test(enable = "avx512f,avx512vl")]
52788	unsafe fn test_mm256_mask_broadcastd_epi32() {
52789	let src = _mm256_set1_epi32(`20`);
52790	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52791	let r = _mm256_mask_broadcastd_epi32(src, `0`, a);
52792	assert_eq_m256i(r, src);
52793	let r = _mm256_mask_broadcastd_epi32(src, `0b11111111`, a);
52794	let e = _mm256_set1_epi32(`20`);
52795	assert_eq_m256i(r, e);
52796	}
52797
52798	#[simd_test(enable = "avx512f,avx512vl")]
52799	unsafe fn test_mm256_maskz_broadcastd_epi32() {
52800	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52801	let r = _mm256_maskz_broadcastd_epi32(`0`, a);
52802	assert_eq_m256i(r, _mm256_setzero_si256());
52803	let r = _mm256_maskz_broadcastd_epi32(`0b11111111`, a);
52804	let e = _mm256_set1_epi32(`20`);
52805	assert_eq_m256i(r, e);
52806	}
52807
52808	#[simd_test(enable = "avx512f,avx512vl")]
52809	unsafe fn test_mm_mask_broadcastd_epi32() {
52810	let src = _mm_set1_epi32(`20`);
52811	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52812	let r = _mm_mask_broadcastd_epi32(src, `0`, a);
52813	assert_eq_m128i(r, src);
52814	let r = _mm_mask_broadcastd_epi32(src, `0b00001111`, a);
52815	let e = _mm_set1_epi32(`20`);
52816	assert_eq_m128i(r, e);
52817	}
52818
52819	#[simd_test(enable = "avx512f,avx512vl")]
52820	unsafe fn test_mm_maskz_broadcastd_epi32() {
52821	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52822	let r = _mm_maskz_broadcastd_epi32(`0`, a);
52823	assert_eq_m128i(r, _mm_setzero_si128());
52824	let r = _mm_maskz_broadcastd_epi32(`0b00001111`, a);
52825	let e = _mm_set1_epi32(`20`);
52826	assert_eq_m128i(r, e);
52827	}
52828
52829	#[simd_test(enable = "avx512f")]
52830	unsafe fn test_mm512_broadcastss_ps() {
52831	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52832	let r = _mm512_broadcastss_ps(a);
52833	let e = _mm512_set1_ps(`20.`);
52834	assert_eq_m512(r, e);
52835	}
52836
52837	#[simd_test(enable = "avx512f")]
52838	unsafe fn test_mm512_mask_broadcastss_ps() {
52839	let src = _mm512_set1_ps(`20.`);
52840	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52841	let r = _mm512_mask_broadcastss_ps(src, `0`, a);
52842	assert_eq_m512(r, src);
52843	let r = _mm512_mask_broadcastss_ps(src, `0b11111111_11111111`, a);
52844	let e = _mm512_set1_ps(`20.`);
52845	assert_eq_m512(r, e);
52846	}
52847
52848	#[simd_test(enable = "avx512f")]
52849	unsafe fn test_mm512_maskz_broadcastss_ps() {
52850	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52851	let r = _mm512_maskz_broadcastss_ps(`0`, a);
52852	assert_eq_m512(r, _mm512_setzero_ps());
52853	let r = _mm512_maskz_broadcastss_ps(`0b00000000_11111111`, a);
52854	let e = _mm512_setr_ps(
52855	`20.`, `20.`, `20.`, `20.`, `20.`, `20.`, `20.`, `20.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
52856	);
52857	assert_eq_m512(r, e);
52858	}
52859
52860	#[simd_test(enable = "avx512f,avx512vl")]
52861	unsafe fn test_mm256_mask_broadcastss_ps() {
52862	let src = _mm256_set1_ps(`20.`);
52863	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52864	let r = _mm256_mask_broadcastss_ps(src, `0`, a);
52865	assert_eq_m256(r, src);
52866	let r = _mm256_mask_broadcastss_ps(src, `0b11111111`, a);
52867	let e = _mm256_set1_ps(`20.`);
52868	assert_eq_m256(r, e);
52869	}
52870
52871	#[simd_test(enable = "avx512f,avx512vl")]
52872	unsafe fn test_mm256_maskz_broadcastss_ps() {
52873	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52874	let r = _mm256_maskz_broadcastss_ps(`0`, a);
52875	assert_eq_m256(r, _mm256_setzero_ps());
52876	let r = _mm256_maskz_broadcastss_ps(`0b11111111`, a);
52877	let e = _mm256_set1_ps(`20.`);
52878	assert_eq_m256(r, e);
52879	}
52880
52881	#[simd_test(enable = "avx512f,avx512vl")]
52882	unsafe fn test_mm_mask_broadcastss_ps() {
52883	let src = _mm_set1_ps(`20.`);
52884	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52885	let r = _mm_mask_broadcastss_ps(src, `0`, a);
52886	assert_eq_m128(r, src);
52887	let r = _mm_mask_broadcastss_ps(src, `0b00001111`, a);
52888	let e = _mm_set1_ps(`20.`);
52889	assert_eq_m128(r, e);
52890	}
52891
52892	#[simd_test(enable = "avx512f,avx512vl")]
52893	unsafe fn test_mm_maskz_broadcastss_ps() {
52894	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52895	let r = _mm_maskz_broadcastss_ps(`0`, a);
52896	assert_eq_m128(r, _mm_setzero_ps());
52897	let r = _mm_maskz_broadcastss_ps(`0b00001111`, a);
52898	let e = _mm_set1_ps(`20.`);
52899	assert_eq_m128(r, e);
52900	}
52901
52902	#[simd_test(enable = "avx512f")]
52903	unsafe fn test_mm512_broadcast_i32x4() {
52904	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52905	let r = _mm512_broadcast_i32x4(a);
52906	let e = _mm512_set_epi32(
52907	`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`,
52908	);
52909	assert_eq_m512i(r, e);
52910	}
52911
52912	#[simd_test(enable = "avx512f")]
52913	unsafe fn test_mm512_mask_broadcast_i32x4() {
52914	let src = _mm512_set1_epi32(`20`);
52915	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52916	let r = _mm512_mask_broadcast_i32x4(src, `0`, a);
52917	assert_eq_m512i(r, src);
52918	let r = _mm512_mask_broadcast_i32x4(src, `0b11111111_11111111`, a);
52919	let e = _mm512_set_epi32(
52920	`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`,
52921	);
52922	assert_eq_m512i(r, e);
52923	}
52924
52925	#[simd_test(enable = "avx512f")]
52926	unsafe fn test_mm512_maskz_broadcast_i32x4() {
52927	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52928	let r = _mm512_maskz_broadcast_i32x4(`0`, a);
52929	assert_eq_m512i(r, _mm512_setzero_si512());
52930	let r = _mm512_maskz_broadcast_i32x4(`0b00000000_11111111`, a);
52931	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`);
52932	assert_eq_m512i(r, e);
52933	}
52934
52935	#[simd_test(enable = "avx512f,avx512vl")]
52936	unsafe fn test_mm256_broadcast_i32x4() {
52937	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52938	let r = _mm256_broadcast_i32x4(a);
52939	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`);
52940	assert_eq_m256i(r, e);
52941	}
52942
52943	#[simd_test(enable = "avx512f,avx512vl")]
52944	unsafe fn test_mm256_mask_broadcast_i32x4() {
52945	let src = _mm256_set1_epi32(`20`);
52946	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52947	let r = _mm256_mask_broadcast_i32x4(src, `0`, a);
52948	assert_eq_m256i(r, src);
52949	let r = _mm256_mask_broadcast_i32x4(src, `0b11111111`, a);
52950	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`);
52951	assert_eq_m256i(r, e);
52952	}
52953
52954	#[simd_test(enable = "avx512f,avx512vl")]
52955	unsafe fn test_mm256_maskz_broadcast_i32x4() {
52956	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
52957	let r = _mm256_maskz_broadcast_i32x4(`0`, a);
52958	assert_eq_m256i(r, _mm256_setzero_si256());
52959	let r = _mm256_maskz_broadcast_i32x4(`0b11111111`, a);
52960	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`);
52961	assert_eq_m256i(r, e);
52962	}
52963
52964	#[simd_test(enable = "avx512f")]
52965	unsafe fn test_mm512_broadcast_f32x4() {
52966	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52967	let r = _mm512_broadcast_f32x4(a);
52968	let e = _mm512_set_ps(
52969	`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`,
52970	);
52971	assert_eq_m512(r, e);
52972	}
52973
52974	#[simd_test(enable = "avx512f")]
52975	unsafe fn test_mm512_mask_broadcast_f32x4() {
52976	let src = _mm512_set1_ps(`20.`);
52977	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52978	let r = _mm512_mask_broadcast_f32x4(src, `0`, a);
52979	assert_eq_m512(r, src);
52980	let r = _mm512_mask_broadcast_f32x4(src, `0b11111111_11111111`, a);
52981	let e = _mm512_set_ps(
52982	`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`,
52983	);
52984	assert_eq_m512(r, e);
52985	}
52986
52987	#[simd_test(enable = "avx512f")]
52988	unsafe fn test_mm512_maskz_broadcast_f32x4() {
52989	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
52990	let r = _mm512_maskz_broadcast_f32x4(`0`, a);
52991	assert_eq_m512(r, _mm512_setzero_ps());
52992	let r = _mm512_maskz_broadcast_f32x4(`0b00000000_11111111`, a);
52993	let e = _mm512_set_ps(
52994	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`,
52995	);
52996	assert_eq_m512(r, e);
52997	}
52998
52999	#[simd_test(enable = "avx512f,avx512vl")]
53000	unsafe fn test_mm256_broadcast_f32x4() {
53001	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
53002	let r = _mm256_broadcast_f32x4(a);
53003	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`);
53004	assert_eq_m256(r, e);
53005	}
53006
53007	#[simd_test(enable = "avx512f,avx512vl")]
53008	unsafe fn test_mm256_mask_broadcast_f32x4() {
53009	let src = _mm256_set1_ps(`20.`);
53010	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
53011	let r = _mm256_mask_broadcast_f32x4(src, `0`, a);
53012	assert_eq_m256(r, src);
53013	let r = _mm256_mask_broadcast_f32x4(src, `0b11111111`, a);
53014	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`);
53015	assert_eq_m256(r, e);
53016	}
53017
53018	#[simd_test(enable = "avx512f,avx512vl")]
53019	unsafe fn test_mm256_maskz_broadcast_f32x4() {
53020	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
53021	let r = _mm256_maskz_broadcast_f32x4(`0`, a);
53022	assert_eq_m256(r, _mm256_setzero_ps());
53023	let r = _mm256_maskz_broadcast_f32x4(`0b11111111`, a);
53024	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`);
53025	assert_eq_m256(r, e);
53026	}
53027
53028	#[simd_test(enable = "avx512f")]
53029	unsafe fn test_mm512_mask_blend_epi32() {
53030	let a = _mm512_set1_epi32(`1`);
53031	let b = _mm512_set1_epi32(`2`);
53032	let r = _mm512_mask_blend_epi32(`0b11111111_00000000`, a, b);
53033	let e = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
53034	assert_eq_m512i(r, e);
53035	}
53036
53037	#[simd_test(enable = "avx512f,avx512vl")]
53038	unsafe fn test_mm256_mask_blend_epi32() {
53039	let a = _mm256_set1_epi32(`1`);
53040	let b = _mm256_set1_epi32(`2`);
53041	let r = _mm256_mask_blend_epi32(`0b11111111`, a, b);
53042	let e = _mm256_set1_epi32(`2`);
53043	assert_eq_m256i(r, e);
53044	}
53045
53046	#[simd_test(enable = "avx512f,avx512vl")]
53047	unsafe fn test_mm_mask_blend_epi32() {
53048	let a = _mm_set1_epi32(`1`);
53049	let b = _mm_set1_epi32(`2`);
53050	let r = _mm_mask_blend_epi32(`0b00001111`, a, b);
53051	let e = _mm_set1_epi32(`2`);
53052	assert_eq_m128i(r, e);
53053	}
53054
53055	#[simd_test(enable = "avx512f")]
53056	unsafe fn test_mm512_mask_blend_ps() {
53057	let a = _mm512_set1_ps(`1.`);
53058	let b = _mm512_set1_ps(`2.`);
53059	let r = _mm512_mask_blend_ps(`0b11111111_00000000`, a, b);
53060	let e = _mm512_set_ps(
53061	`2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
53062	);
53063	assert_eq_m512(r, e);
53064	}
53065
53066	#[simd_test(enable = "avx512f,avx512vl")]
53067	unsafe fn test_mm256_mask_blend_ps() {
53068	let a = _mm256_set1_ps(`1.`);
53069	let b = _mm256_set1_ps(`2.`);
53070	let r = _mm256_mask_blend_ps(`0b11111111`, a, b);
53071	let e = _mm256_set1_ps(`2.`);
53072	assert_eq_m256(r, e);
53073	}
53074
53075	#[simd_test(enable = "avx512f,avx512vl")]
53076	unsafe fn test_mm_mask_blend_ps() {
53077	let a = _mm_set1_ps(`1.`);
53078	let b = _mm_set1_ps(`2.`);
53079	let r = _mm_mask_blend_ps(`0b00001111`, a, b);
53080	let e = _mm_set1_ps(`2.`);
53081	assert_eq_m128(r, e);
53082	}
53083
53084	#[simd_test(enable = "avx512f")]
53085	unsafe fn test_mm512_unpackhi_epi32() {
53086	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
53087	let b = _mm512_set_epi32(
53088	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
53089	);
53090	let r = _mm512_unpackhi_epi32(a, b);
53091	let e = _mm512_set_epi32(`17`, `1`, `18`, `2`, `21`, `5`, `22`, `6`, `25`, `9`, `26`, `10`, `29`, `13`, `30`, `14`);
53092	assert_eq_m512i(r, e);
53093	}
53094
53095	#[simd_test(enable = "avx512f")]
53096	unsafe fn test_mm512_mask_unpackhi_epi32() {
53097	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
53098	let b = _mm512_set_epi32(
53099	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
53100	);
53101	let r = _mm512_mask_unpackhi_epi32(a, `0`, a, b);
53102	assert_eq_m512i(r, a);
53103	let r = _mm512_mask_unpackhi_epi32(a, `0b11111111_11111111`, a, b);
53104	let e = _mm512_set_epi32(`17`, `1`, `18`, `2`, `21`, `5`, `22`, `6`, `25`, `9`, `26`, `10`, `29`, `13`, `30`, `14`);
53105	assert_eq_m512i(r, e);
53106	}
53107
53108	#[simd_test(enable = "avx512f")]
53109	unsafe fn test_mm512_maskz_unpackhi_epi32() {
53110	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
53111	let b = _mm512_set_epi32(
53112	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
53113	);
53114	let r = _mm512_maskz_unpackhi_epi32(`0`, a, b);
53115	assert_eq_m512i(r, _mm512_setzero_si512());
53116	let r = _mm512_maskz_unpackhi_epi32(`0b00000000_11111111`, a, b);
53117	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `25`, `9`, `26`, `10`, `29`, `13`, `30`, `14`);
53118	assert_eq_m512i(r, e);
53119	}
53120
53121	#[simd_test(enable = "avx512f,avx512vl")]
53122	unsafe fn test_mm256_mask_unpackhi_epi32() {
53123	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
53124	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
53125	let r = _mm256_mask_unpackhi_epi32(a, `0`, a, b);
53126	assert_eq_m256i(r, a);
53127	let r = _mm256_mask_unpackhi_epi32(a, `0b11111111`, a, b);
53128	let e = _mm256_set_epi32(`17`, `1`, `18`, `2`, `21`, `5`, `22`, `6`);
53129	assert_eq_m256i(r, e);
53130	}
53131
53132	#[simd_test(enable = "avx512f,avx512vl")]
53133	unsafe fn test_mm256_maskz_unpackhi_epi32() {
53134	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
53135	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
53136	let r = _mm256_maskz_unpackhi_epi32(`0`, a, b);
53137	assert_eq_m256i(r, _mm256_setzero_si256());
53138	let r = _mm256_maskz_unpackhi_epi32(`0b11111111`, a, b);
53139	let e = _mm256_set_epi32(`17`, `1`, `18`, `2`, `21`, `5`, `22`, `6`);
53140	assert_eq_m256i(r, e);
53141	}
53142
53143	#[simd_test(enable = "avx512f,avx512vl")]
53144	unsafe fn test_mm_mask_unpackhi_epi32() {
53145	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
53146	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
53147	let r = _mm_mask_unpackhi_epi32(a, `0`, a, b);
53148	assert_eq_m128i(r, a);
53149	let r = _mm_mask_unpackhi_epi32(a, `0b00001111`, a, b);
53150	let e = _mm_set_epi32(`17`, `1`, `18`, `2`);
53151	assert_eq_m128i(r, e);
53152	}
53153
53154	#[simd_test(enable = "avx512f,avx512vl")]
53155	unsafe fn test_mm_maskz_unpackhi_epi32() {
53156	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
53157	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
53158	let r = _mm_maskz_unpackhi_epi32(`0`, a, b);
53159	assert_eq_m128i(r, _mm_setzero_si128());
53160	let r = _mm_maskz_unpackhi_epi32(`0b00001111`, a, b);
53161	let e = _mm_set_epi32(`17`, `1`, `18`, `2`);
53162	assert_eq_m128i(r, e);
53163	}
53164
53165	#[simd_test(enable = "avx512f")]
53166	unsafe fn test_mm512_unpackhi_ps() {
53167	let a = _mm512_set_ps(
53168	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
53169	);
53170	let b = _mm512_set_ps(
53171	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
53172	);
53173	let r = _mm512_unpackhi_ps(a, b);
53174	let e = _mm512_set_ps(
53175	`17.`, `1.`, `18.`, `2.`, `21.`, `5.`, `22.`, `6.`, `25.`, `9.`, `26.`, `10.`, `29.`, `13.`, `30.`, `14.`,
53176	);
53177	assert_eq_m512(r, e);
53178	}
53179
53180	#[simd_test(enable = "avx512f")]
53181	unsafe fn test_mm512_mask_unpackhi_ps() {
53182	let a = _mm512_set_ps(
53183	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
53184	);
53185	let b = _mm512_set_ps(
53186	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
53187	);
53188	let r = _mm512_mask_unpackhi_ps(a, `0`, a, b);
53189	assert_eq_m512(r, a);
53190	let r = _mm512_mask_unpackhi_ps(a, `0b11111111_11111111`, a, b);
53191	let e = _mm512_set_ps(
53192	`17.`, `1.`, `18.`, `2.`, `21.`, `5.`, `22.`, `6.`, `25.`, `9.`, `26.`, `10.`, `29.`, `13.`, `30.`, `14.`,
53193	);
53194	assert_eq_m512(r, e);
53195	}
53196
53197	#[simd_test(enable = "avx512f")]
53198	unsafe fn test_mm512_maskz_unpackhi_ps() {
53199	let a = _mm512_set_ps(
53200	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
53201	);
53202	let b = _mm512_set_ps(
53203	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
53204	);
53205	let r = _mm512_maskz_unpackhi_ps(`0`, a, b);
53206	assert_eq_m512(r, _mm512_setzero_ps());
53207	let r = _mm512_maskz_unpackhi_ps(`0b00000000_11111111`, a, b);
53208	let e = _mm512_set_ps(
53209	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `25.`, `9.`, `26.`, `10.`, `29.`, `13.`, `30.`, `14.`,
53210	);
53211	assert_eq_m512(r, e);
53212	}
53213
53214	#[simd_test(enable = "avx512f,avx512vl")]
53215	unsafe fn test_mm256_mask_unpackhi_ps() {
53216	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
53217	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
53218	let r = _mm256_mask_unpackhi_ps(a, `0`, a, b);
53219	assert_eq_m256(r, a);
53220	let r = _mm256_mask_unpackhi_ps(a, `0b11111111`, a, b);
53221	let e = _mm256_set_ps(`17.`, `1.`, `18.`, `2.`, `21.`, `5.`, `22.`, `6.`);
53222	assert_eq_m256(r, e);
53223	}
53224
53225	#[simd_test(enable = "avx512f,avx512vl")]
53226	unsafe fn test_mm256_maskz_unpackhi_ps() {
53227	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
53228	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
53229	let r = _mm256_maskz_unpackhi_ps(`0`, a, b);
53230	assert_eq_m256(r, _mm256_setzero_ps());
53231	let r = _mm256_maskz_unpackhi_ps(`0b11111111`, a, b);
53232	let e = _mm256_set_ps(`17.`, `1.`, `18.`, `2.`, `21.`, `5.`, `22.`, `6.`);
53233	assert_eq_m256(r, e);
53234	}
53235
53236	#[simd_test(enable = "avx512f,avx512vl")]
53237	unsafe fn test_mm_mask_unpackhi_ps() {
53238	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
53239	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
53240	let r = _mm_mask_unpackhi_ps(a, `0`, a, b);
53241	assert_eq_m128(r, a);
53242	let r = _mm_mask_unpackhi_ps(a, `0b00001111`, a, b);
53243	let e = _mm_set_ps(`17.`, `1.`, `18.`, `2.`);
53244	assert_eq_m128(r, e);
53245	}
53246
53247	#[simd_test(enable = "avx512f,avx512vl")]
53248	unsafe fn test_mm_maskz_unpackhi_ps() {
53249	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
53250	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
53251	let r = _mm_maskz_unpackhi_ps(`0`, a, b);
53252	assert_eq_m128(r, _mm_setzero_ps());
53253	let r = _mm_maskz_unpackhi_ps(`0b00001111`, a, b);
53254	let e = _mm_set_ps(`17.`, `1.`, `18.`, `2.`);
53255	assert_eq_m128(r, e);
53256	}
53257
53258	#[simd_test(enable = "avx512f")]
53259	unsafe fn test_mm512_unpacklo_epi32() {
53260	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
53261	let b = _mm512_set_epi32(
53262	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
53263	);
53264	let r = _mm512_unpacklo_epi32(a, b);
53265	let e = _mm512_set_epi32(`19`, `3`, `20`, `4`, `23`, `7`, `24`, `8`, `27`, `11`, `28`, `12`, `31`, `15`, `32`, `16`);
53266	assert_eq_m512i(r, e);
53267	}
53268
53269	#[simd_test(enable = "avx512f")]
53270	unsafe fn test_mm512_mask_unpacklo_epi32() {
53271	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
53272	let b = _mm512_set_epi32(
53273	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
53274	);
53275	let r = _mm512_mask_unpacklo_epi32(a, `0`, a, b);
53276	assert_eq_m512i(r, a);
53277	let r = _mm512_mask_unpacklo_epi32(a, `0b11111111_11111111`, a, b);
53278	let e = _mm512_set_epi32(`19`, `3`, `20`, `4`, `23`, `7`, `24`, `8`, `27`, `11`, `28`, `12`, `31`, `15`, `32`, `16`);
53279	assert_eq_m512i(r, e);
53280	}
53281
53282	#[simd_test(enable = "avx512f")]
53283	unsafe fn test_mm512_maskz_unpacklo_epi32() {
53284	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
53285	let b = _mm512_set_epi32(
53286	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
53287	);
53288	let r = _mm512_maskz_unpacklo_epi32(`0`, a, b);
53289	assert_eq_m512i(r, _mm512_setzero_si512());
53290	let r = _mm512_maskz_unpacklo_epi32(`0b00000000_11111111`, a, b);
53291	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `27`, `11`, `28`, `12`, `31`, `15`, `32`, `16`);
53292	assert_eq_m512i(r, e);
53293	}
53294
53295	#[simd_test(enable = "avx512f,avx512vl")]
53296	unsafe fn test_mm256_mask_unpacklo_epi32() {
53297	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
53298	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
53299	let r = _mm256_mask_unpacklo_epi32(a, `0`, a, b);
53300	assert_eq_m256i(r, a);
53301	let r = _mm256_mask_unpacklo_epi32(a, `0b11111111`, a, b);
53302	let e = _mm256_set_epi32(`19`, `3`, `20`, `4`, `23`, `7`, `24`, `8`);
53303	assert_eq_m256i(r, e);
53304	}
53305
53306	#[simd_test(enable = "avx512f,avx512vl")]
53307	unsafe fn test_mm256_maskz_unpacklo_epi32() {
53308	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
53309	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
53310	let r = _mm256_maskz_unpacklo_epi32(`0`, a, b);
53311	assert_eq_m256i(r, _mm256_setzero_si256());
53312	let r = _mm256_maskz_unpacklo_epi32(`0b11111111`, a, b);
53313	let e = _mm256_set_epi32(`19`, `3`, `20`, `4`, `23`, `7`, `24`, `8`);
53314	assert_eq_m256i(r, e);
53315	}
53316
53317	#[simd_test(enable = "avx512f,avx512vl")]
53318	unsafe fn test_mm_mask_unpacklo_epi32() {
53319	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
53320	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
53321	let r = _mm_mask_unpacklo_epi32(a, `0`, a, b);
53322	assert_eq_m128i(r, a);
53323	let r = _mm_mask_unpacklo_epi32(a, `0b00001111`, a, b);
53324	let e = _mm_set_epi32(`19`, `3`, `20`, `4`);
53325	assert_eq_m128i(r, e);
53326	}
53327
53328	#[simd_test(enable = "avx512f,avx512vl")]
53329	unsafe fn test_mm_maskz_unpacklo_epi32() {
53330	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
53331	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
53332	let r = _mm_maskz_unpacklo_epi32(`0`, a, b);
53333	assert_eq_m128i(r, _mm_setzero_si128());
53334	let r = _mm_maskz_unpacklo_epi32(`0b00001111`, a, b);
53335	let e = _mm_set_epi32(`19`, `3`, `20`, `4`);
53336	assert_eq_m128i(r, e);
53337	}
53338
53339	#[simd_test(enable = "avx512f")]
53340	unsafe fn test_mm512_unpacklo_ps() {
53341	let a = _mm512_set_ps(
53342	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
53343	);
53344	let b = _mm512_set_ps(
53345	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
53346	);
53347	let r = _mm512_unpacklo_ps(a, b);
53348	let e = _mm512_set_ps(
53349	`19.`, `3.`, `20.`, `4.`, `23.`, `7.`, `24.`, `8.`, `27.`, `11.`, `28.`, `12.`, `31.`, `15.`, `32.`, `16.`,
53350	);
53351	assert_eq_m512(r, e);
53352	}
53353
53354	#[simd_test(enable = "avx512f")]
53355	unsafe fn test_mm512_mask_unpacklo_ps() {
53356	let a = _mm512_set_ps(
53357	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
53358	);
53359	let b = _mm512_set_ps(
53360	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
53361	);
53362	let r = _mm512_mask_unpacklo_ps(a, `0`, a, b);
53363	assert_eq_m512(r, a);
53364	let r = _mm512_mask_unpacklo_ps(a, `0b11111111_11111111`, a, b);
53365	let e = _mm512_set_ps(
53366	`19.`, `3.`, `20.`, `4.`, `23.`, `7.`, `24.`, `8.`, `27.`, `11.`, `28.`, `12.`, `31.`, `15.`, `32.`, `16.`,
53367	);
53368	assert_eq_m512(r, e);
53369	}
53370
53371	#[simd_test(enable = "avx512f")]
53372	unsafe fn test_mm512_maskz_unpacklo_ps() {
53373	let a = _mm512_set_ps(
53374	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
53375	);
53376	let b = _mm512_set_ps(
53377	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
53378	);
53379	let r = _mm512_maskz_unpacklo_ps(`0`, a, b);
53380	assert_eq_m512(r, _mm512_setzero_ps());
53381	let r = _mm512_maskz_unpacklo_ps(`0b00000000_11111111`, a, b);
53382	let e = _mm512_set_ps(
53383	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `27.`, `11.`, `28.`, `12.`, `31.`, `15.`, `32.`, `16.`,
53384	);
53385	assert_eq_m512(r, e);
53386	}
53387
53388	#[simd_test(enable = "avx512f,avx512vl")]
53389	unsafe fn test_mm256_mask_unpacklo_ps() {
53390	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
53391	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
53392	let r = _mm256_mask_unpacklo_ps(a, `0`, a, b);
53393	assert_eq_m256(r, a);
53394	let r = _mm256_mask_unpacklo_ps(a, `0b11111111`, a, b);
53395	let e = _mm256_set_ps(`19.`, `3.`, `20.`, `4.`, `23.`, `7.`, `24.`, `8.`);
53396	assert_eq_m256(r, e);
53397	}
53398
53399	#[simd_test(enable = "avx512f,avx512vl")]
53400	unsafe fn test_mm256_maskz_unpacklo_ps() {
53401	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
53402	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
53403	let r = _mm256_maskz_unpacklo_ps(`0`, a, b);
53404	assert_eq_m256(r, _mm256_setzero_ps());
53405	let r = _mm256_maskz_unpacklo_ps(`0b11111111`, a, b);
53406	let e = _mm256_set_ps(`19.`, `3.`, `20.`, `4.`, `23.`, `7.`, `24.`, `8.`);
53407	assert_eq_m256(r, e);
53408	}
53409
53410	#[simd_test(enable = "avx512f,avx512vl")]
53411	unsafe fn test_mm_mask_unpacklo_ps() {
53412	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
53413	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
53414	let r = _mm_mask_unpacklo_ps(a, `0`, a, b);
53415	assert_eq_m128(r, a);
53416	let r = _mm_mask_unpacklo_ps(a, `0b00001111`, a, b);
53417	let e = _mm_set_ps(`19.`, `3.`, `20.`, `4.`);
53418	assert_eq_m128(r, e);
53419	}
53420
53421	#[simd_test(enable = "avx512f,avx512vl")]
53422	unsafe fn test_mm_maskz_unpacklo_ps() {
53423	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
53424	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
53425	let r = _mm_maskz_unpacklo_ps(`0`, a, b);
53426	assert_eq_m128(r, _mm_setzero_ps());
53427	let r = _mm_maskz_unpacklo_ps(`0b00001111`, a, b);
53428	let e = _mm_set_ps(`19.`, `3.`, `20.`, `4.`);
53429	assert_eq_m128(r, e);
53430	}
53431
53432	#[simd_test(enable = "avx512f")]
53433	unsafe fn test_mm512_alignr_epi32() {
53434	let a = _mm512_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
53435	let b = _mm512_set_epi32(
53436	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`,
53437	);
53438	let r = _mm512_alignr_epi32::<`0`>(a, b);
53439	assert_eq_m512i(r, b);
53440	let r = _mm512_alignr_epi32::<`16`>(a, b);
53441	assert_eq_m512i(r, b);
53442	let r = _mm512_alignr_epi32::<`1`>(a, b);
53443	let e = _mm512_set_epi32(
53444	`1`, `32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`,
53445	);
53446	assert_eq_m512i(r, e);
53447	}
53448
53449	#[simd_test(enable = "avx512f")]
53450	unsafe fn test_mm512_mask_alignr_epi32() {
53451	let a = _mm512_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
53452	let b = _mm512_set_epi32(
53453	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`,
53454	);
53455	let r = _mm512_mask_alignr_epi32::<`1`>(a, `0`, a, b);
53456	assert_eq_m512i(r, a);
53457	let r = _mm512_mask_alignr_epi32::<`1`>(a, `0b11111111_11111111`, a, b);
53458	let e = _mm512_set_epi32(
53459	`1`, `32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`,
53460	);
53461	assert_eq_m512i(r, e);
53462	}
53463
53464	#[simd_test(enable = "avx512f")]
53465	unsafe fn test_mm512_maskz_alignr_epi32() {
53466	let a = _mm512_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
53467	let b = _mm512_set_epi32(
53468	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`,
53469	);
53470	let r = _mm512_maskz_alignr_epi32::<`1`>(`0`, a, b);
53471	assert_eq_m512i(r, _mm512_setzero_si512());
53472	let r = _mm512_maskz_alignr_epi32::<`1`>(`0b00000000_11111111`, a, b);
53473	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`);
53474	assert_eq_m512i(r, e);
53475	}
53476
53477	#[simd_test(enable = "avx512f,avx512vl")]
53478	unsafe fn test_mm256_alignr_epi32() {
53479	let a = _mm256_set_epi32(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
53480	let b = _mm256_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`);
53481	let r = _mm256_alignr_epi32::<`0`>(a, b);
53482	assert_eq_m256i(r, b);
53483	let r = _mm256_alignr_epi32::<`1`>(a, b);
53484	let e = _mm256_set_epi32(`1`, `16`, `15`, `14`, `13`, `12`, `11`, `10`);
53485	assert_eq_m256i(r, e);
53486	}
53487
53488	#[simd_test(enable = "avx512f,avx512vl")]
53489	unsafe fn test_mm256_mask_alignr_epi32() {
53490	let a = _mm256_set_epi32(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
53491	let b = _mm256_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`);
53492	let r = _mm256_mask_alignr_epi32::<`1`>(a, `0`, a, b);
53493	assert_eq_m256i(r, a);
53494	let r = _mm256_mask_alignr_epi32::<`1`>(a, `0b11111111`, a, b);
53495	let e = _mm256_set_epi32(`1`, `16`, `15`, `14`, `13`, `12`, `11`, `10`);
53496	assert_eq_m256i(r, e);
53497	}
53498
53499	#[simd_test(enable = "avx512f,avx512vl")]
53500	unsafe fn test_mm256_maskz_alignr_epi32() {
53501	let a = _mm256_set_epi32(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
53502	let b = _mm256_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`);
53503	let r = _mm256_maskz_alignr_epi32::<`1`>(`0`, a, b);
53504	assert_eq_m256i(r, _mm256_setzero_si256());
53505	let r = _mm256_maskz_alignr_epi32::<`1`>(`0b11111111`, a, b);
53506	let e = _mm256_set_epi32(`1`, `16`, `15`, `14`, `13`, `12`, `11`, `10`);
53507	assert_eq_m256i(r, e);
53508	}
53509
53510	#[simd_test(enable = "avx512f,avx512vl")]
53511	unsafe fn test_mm_alignr_epi32() {
53512	let a = _mm_set_epi32(`4`, `3`, `2`, `1`);
53513	let b = _mm_set_epi32(`8`, `7`, `6`, `5`);
53514	let r = _mm_alignr_epi32::<`0`>(a, b);
53515	assert_eq_m128i(r, b);
53516	let r = _mm_alignr_epi32::<`1`>(a, b);
53517	let e = _mm_set_epi32(`1`, `8`, `7`, `6`);
53518	assert_eq_m128i(r, e);
53519	}
53520
53521	#[simd_test(enable = "avx512f,avx512vl")]
53522	unsafe fn test_mm_mask_alignr_epi32() {
53523	let a = _mm_set_epi32(`4`, `3`, `2`, `1`);
53524	let b = _mm_set_epi32(`8`, `7`, `6`, `5`);
53525	let r = _mm_mask_alignr_epi32::<`1`>(a, `0`, a, b);
53526	assert_eq_m128i(r, a);
53527	let r = _mm_mask_alignr_epi32::<`1`>(a, `0b00001111`, a, b);
53528	let e = _mm_set_epi32(`1`, `8`, `7`, `6`);
53529	assert_eq_m128i(r, e);
53530	}
53531
53532	#[simd_test(enable = "avx512f,avx512vl")]
53533	unsafe fn test_mm_maskz_alignr_epi32() {
53534	let a = _mm_set_epi32(`4`, `3`, `2`, `1`);
53535	let b = _mm_set_epi32(`8`, `7`, `6`, `5`);
53536	let r = _mm_maskz_alignr_epi32::<`1`>(`0`, a, b);
53537	assert_eq_m128i(r, _mm_setzero_si128());
53538	let r = _mm_maskz_alignr_epi32::<`1`>(`0b00001111`, a, b);
53539	let e = _mm_set_epi32(`1`, `8`, `7`, `6`);
53540	assert_eq_m128i(r, e);
53541	}
53542
53543	#[simd_test(enable = "avx512f")]
53544	unsafe fn test_mm512_and_epi32() {
53545	#[rustfmt::skip]
53546	let a = _mm512_set_epi32(
53547	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53548	`0`, `0`, `0`, `0`,
53549	`0`, `0`, `0`, `0`,
53550	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
53551	);
53552	#[rustfmt::skip]
53553	let b = _mm512_set_epi32(
53554	`1` << `1`, `0`, `0`, `0`,
53555	`0`, `0`, `0`, `0`,
53556	`0`, `0`, `0`, `0`,
53557	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
53558	);
53559	let r = _mm512_and_epi32(a, b);
53560	let e = _mm512_set_epi32(`1` << `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `3`);
53561	assert_eq_m512i(r, e);
53562	}
53563
53564	#[simd_test(enable = "avx512f")]
53565	unsafe fn test_mm512_mask_and_epi32() {
53566	#[rustfmt::skip]
53567	let a = _mm512_set_epi32(
53568	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53569	`0`, `0`, `0`, `0`,
53570	`0`, `0`, `0`, `0`,
53571	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
53572	);
53573	#[rustfmt::skip]
53574	let b = _mm512_set_epi32(
53575	`1` << `1`, `0`, `0`, `0`,
53576	`0`, `0`, `0`, `0`,
53577	`0`, `0`, `0`, `0`,
53578	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
53579	);
53580	let r = _mm512_mask_and_epi32(a, `0`, a, b);
53581	assert_eq_m512i(r, a);
53582	let r = _mm512_mask_and_epi32(a, `0b01111111_11111111`, a, b);
53583	#[rustfmt::skip]
53584	let e = _mm512_set_epi32(
53585	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53586	`0`, `0`, `0`, `0`,
53587	`0`, `0`, `0`, `0`,
53588	`0`, `0`, `0`, `1` << `3`,
53589	);
53590	assert_eq_m512i(r, e);
53591	}
53592
53593	#[simd_test(enable = "avx512f")]
53594	unsafe fn test_mm512_maskz_and_epi32() {
53595	#[rustfmt::skip]
53596	let a = _mm512_set_epi32(
53597	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53598	`0`, `0`, `0`, `0`,
53599	`0`, `0`, `0`, `0`,
53600	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
53601	);
53602	#[rustfmt::skip]
53603	let b = _mm512_set_epi32(
53604	`1` << `1`, `0`, `0`, `0`,
53605	`0`, `0`, `0`, `0`,
53606	`0`, `0`, `0`, `0`,
53607	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
53608	);
53609	let r = _mm512_maskz_and_epi32(`0`, a, b);
53610	assert_eq_m512i(r, _mm512_setzero_si512());
53611	let r = _mm512_maskz_and_epi32(`0b00000000_11111111`, a, b);
53612	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `3`);
53613	assert_eq_m512i(r, e);
53614	}
53615
53616	#[simd_test(enable = "avx512f,avx512vl")]
53617	unsafe fn test_mm256_mask_and_epi32() {
53618	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
53619	let b = _mm256_set1_epi32(`1` << `1`);
53620	let r = _mm256_mask_and_epi32(a, `0`, a, b);
53621	assert_eq_m256i(r, a);
53622	let r = _mm256_mask_and_epi32(a, `0b11111111`, a, b);
53623	let e = _mm256_set1_epi32(`1` << `1`);
53624	assert_eq_m256i(r, e);
53625	}
53626
53627	#[simd_test(enable = "avx512f,avx512vl")]
53628	unsafe fn test_mm256_maskz_and_epi32() {
53629	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
53630	let b = _mm256_set1_epi32(`1` << `1`);
53631	let r = _mm256_maskz_and_epi32(`0`, a, b);
53632	assert_eq_m256i(r, _mm256_setzero_si256());
53633	let r = _mm256_maskz_and_epi32(`0b11111111`, a, b);
53634	let e = _mm256_set1_epi32(`1` << `1`);
53635	assert_eq_m256i(r, e);
53636	}
53637
53638	#[simd_test(enable = "avx512f,avx512vl")]
53639	unsafe fn test_mm_mask_and_epi32() {
53640	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
53641	let b = _mm_set1_epi32(`1` << `1`);
53642	let r = _mm_mask_and_epi32(a, `0`, a, b);
53643	assert_eq_m128i(r, a);
53644	let r = _mm_mask_and_epi32(a, `0b00001111`, a, b);
53645	let e = _mm_set1_epi32(`1` << `1`);
53646	assert_eq_m128i(r, e);
53647	}
53648
53649	#[simd_test(enable = "avx512f,avx512vl")]
53650	unsafe fn test_mm_maskz_and_epi32() {
53651	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
53652	let b = _mm_set1_epi32(`1` << `1`);
53653	let r = _mm_maskz_and_epi32(`0`, a, b);
53654	assert_eq_m128i(r, _mm_setzero_si128());
53655	let r = _mm_maskz_and_epi32(`0b00001111`, a, b);
53656	let e = _mm_set1_epi32(`1` << `1`);
53657	assert_eq_m128i(r, e);
53658	}
53659
53660	#[simd_test(enable = "avx512f")]
53661	unsafe fn test_mm512_and_si512() {
53662	#[rustfmt::skip]
53663	let a = _mm512_set_epi32(
53664	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53665	`0`, `0`, `0`, `0`,
53666	`0`, `0`, `0`, `0`,
53667	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
53668	);
53669	#[rustfmt::skip]
53670	let b = _mm512_set_epi32(
53671	`1` << `1`, `0`, `0`, `0`,
53672	`0`, `0`, `0`, `0`,
53673	`0`, `0`, `0`, `0`,
53674	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
53675	);
53676	let r = _mm512_and_epi32(a, b);
53677	let e = _mm512_set_epi32(`1` << `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `3`);
53678	assert_eq_m512i(r, e);
53679	}
53680
53681	#[simd_test(enable = "avx512f")]
53682	unsafe fn test_mm512_or_epi32() {
53683	#[rustfmt::skip]
53684	let a = _mm512_set_epi32(
53685	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53686	`0`, `0`, `0`, `0`,
53687	`0`, `0`, `0`, `0`,
53688	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
53689	);
53690	#[rustfmt::skip]
53691	let b = _mm512_set_epi32(
53692	`1` << `1`, `0`, `0`, `0`,
53693	`0`, `0`, `0`, `0`,
53694	`0`, `0`, `0`, `0`,
53695	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
53696	);
53697	let r = _mm512_or_epi32(a, b);
53698	#[rustfmt::skip]
53699	let e = _mm512_set_epi32(
53700	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53701	`0`, `0`, `0`, `0`,
53702	`0`, `0`, `0`, `0`,
53703	`0`, `0`, `0`, `1` << `1` \| `1` << `3` \| `1` << `4`,
53704	);
53705	assert_eq_m512i(r, e);
53706	}
53707
53708	#[simd_test(enable = "avx512f")]
53709	unsafe fn test_mm512_mask_or_epi32() {
53710	#[rustfmt::skip]
53711	let a = _mm512_set_epi32(
53712	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53713	`0`, `0`, `0`, `0`,
53714	`0`, `0`, `0`, `0`,
53715	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
53716	);
53717	#[rustfmt::skip]
53718	let b = _mm512_set_epi32(
53719	`1` << `1`, `0`, `0`, `0`,
53720	`0`, `0`, `0`, `0`,
53721	`0`, `0`, `0`, `0`,
53722	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
53723	);
53724	let r = _mm512_mask_or_epi32(a, `0`, a, b);
53725	assert_eq_m512i(r, a);
53726	let r = _mm512_mask_or_epi32(a, `0b11111111_11111111`, a, b);
53727	#[rustfmt::skip]
53728	let e = _mm512_set_epi32(
53729	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53730	`0`, `0`, `0`, `0`,
53731	`0`, `0`, `0`, `0`,
53732	`0`, `0`, `0`, `1` << `1` \| `1` << `3` \| `1` << `4`,
53733	);
53734	assert_eq_m512i(r, e);
53735	}
53736
53737	#[simd_test(enable = "avx512f")]
53738	unsafe fn test_mm512_maskz_or_epi32() {
53739	#[rustfmt::skip]
53740	let a = _mm512_set_epi32(
53741	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53742	`0`, `0`, `0`, `0`,
53743	`0`, `0`, `0`, `0`,
53744	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
53745	);
53746	#[rustfmt::skip]
53747	let b = _mm512_set_epi32(
53748	`1` << `1`, `0`, `0`, `0`,
53749	`0`, `0`, `0`, `0`,
53750	`0`, `0`, `0`, `0`,
53751	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
53752	);
53753	let r = _mm512_maskz_or_epi32(`0`, a, b);
53754	assert_eq_m512i(r, _mm512_setzero_si512());
53755	let r = _mm512_maskz_or_epi32(`0b00000000_11111111`, a, b);
53756	#[rustfmt::skip]
53757	let e = _mm512_set_epi32(
53758	`0`, `0`, `0`, `0`,
53759	`0`, `0`, `0`, `0`,
53760	`0`, `0`, `0`, `0`,
53761	`0`, `0`, `0`, `1` << `1` \| `1` << `3` \| `1` << `4`,
53762	);
53763	assert_eq_m512i(r, e);
53764	}
53765
53766	#[simd_test(enable = "avx512f,avx512vl")]
53767	unsafe fn test_mm256_or_epi32() {
53768	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
53769	let b = _mm256_set1_epi32(`1` << `1`);
53770	let r = _mm256_or_epi32(a, b);
53771	let e = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
53772	assert_eq_m256i(r, e);
53773	}
53774
53775	#[simd_test(enable = "avx512f,avx512vl")]
53776	unsafe fn test_mm256_mask_or_epi32() {
53777	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
53778	let b = _mm256_set1_epi32(`1` << `1`);
53779	let r = _mm256_mask_or_epi32(a, `0`, a, b);
53780	assert_eq_m256i(r, a);
53781	let r = _mm256_mask_or_epi32(a, `0b11111111`, a, b);
53782	let e = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
53783	assert_eq_m256i(r, e);
53784	}
53785
53786	#[simd_test(enable = "avx512f,avx512vl")]
53787	unsafe fn test_mm256_maskz_or_epi32() {
53788	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
53789	let b = _mm256_set1_epi32(`1` << `1`);
53790	let r = _mm256_maskz_or_epi32(`0`, a, b);
53791	assert_eq_m256i(r, _mm256_setzero_si256());
53792	let r = _mm256_maskz_or_epi32(`0b11111111`, a, b);
53793	let e = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
53794	assert_eq_m256i(r, e);
53795	}
53796
53797	#[simd_test(enable = "avx512f,avx512vl")]
53798	unsafe fn test_mm_or_epi32() {
53799	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
53800	let b = _mm_set1_epi32(`1` << `1`);
53801	let r = _mm_or_epi32(a, b);
53802	let e = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
53803	assert_eq_m128i(r, e);
53804	}
53805
53806	#[simd_test(enable = "avx512f,avx512vl")]
53807	unsafe fn test_mm_mask_or_epi32() {
53808	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
53809	let b = _mm_set1_epi32(`1` << `1`);
53810	let r = _mm_mask_or_epi32(a, `0`, a, b);
53811	assert_eq_m128i(r, a);
53812	let r = _mm_mask_or_epi32(a, `0b00001111`, a, b);
53813	let e = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
53814	assert_eq_m128i(r, e);
53815	}
53816
53817	#[simd_test(enable = "avx512f,avx512vl")]
53818	unsafe fn test_mm_maskz_or_epi32() {
53819	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
53820	let b = _mm_set1_epi32(`1` << `1`);
53821	let r = _mm_maskz_or_epi32(`0`, a, b);
53822	assert_eq_m128i(r, _mm_setzero_si128());
53823	let r = _mm_maskz_or_epi32(`0b00001111`, a, b);
53824	let e = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
53825	assert_eq_m128i(r, e);
53826	}
53827
53828	#[simd_test(enable = "avx512f")]
53829	unsafe fn test_mm512_or_si512() {
53830	#[rustfmt::skip]
53831	let a = _mm512_set_epi32(
53832	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53833	`0`, `0`, `0`, `0`,
53834	`0`, `0`, `0`, `0`,
53835	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
53836	);
53837	#[rustfmt::skip]
53838	let b = _mm512_set_epi32(
53839	`1` << `1`, `0`, `0`, `0`,
53840	`0`, `0`, `0`, `0`,
53841	`0`, `0`, `0`, `0`,
53842	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
53843	);
53844	let r = _mm512_or_epi32(a, b);
53845	#[rustfmt::skip]
53846	let e = _mm512_set_epi32(
53847	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53848	`0`, `0`, `0`, `0`,
53849	`0`, `0`, `0`, `0`,
53850	`0`, `0`, `0`, `1` << `1` \| `1` << `3` \| `1` << `4`,
53851	);
53852	assert_eq_m512i(r, e);
53853	}
53854
53855	#[simd_test(enable = "avx512f")]
53856	unsafe fn test_mm512_xor_epi32() {
53857	#[rustfmt::skip]
53858	let a = _mm512_set_epi32(
53859	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53860	`0`, `0`, `0`, `0`,
53861	`0`, `0`, `0`, `0`,
53862	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
53863	);
53864	#[rustfmt::skip]
53865	let b = _mm512_set_epi32(
53866	`1` << `1`, `0`, `0`, `0`,
53867	`0`, `0`, `0`, `0`,
53868	`0`, `0`, `0`, `0`,
53869	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
53870	);
53871	let r = _mm512_xor_epi32(a, b);
53872	#[rustfmt::skip]
53873	let e = _mm512_set_epi32(
53874	`1` << `2`, `0`, `0`, `0`,
53875	`0`, `0`, `0`, `0`,
53876	`0`, `0`, `0`, `0`,
53877	`0`, `0`, `0`, `1` << `1` \| `1` << `4`,
53878	);
53879	assert_eq_m512i(r, e);
53880	}
53881
53882	#[simd_test(enable = "avx512f")]
53883	unsafe fn test_mm512_mask_xor_epi32() {
53884	#[rustfmt::skip]
53885	let a = _mm512_set_epi32(
53886	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53887	`0`, `0`, `0`, `0`,
53888	`0`, `0`, `0`, `0`,
53889	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
53890	);
53891	#[rustfmt::skip]
53892	let b = _mm512_set_epi32(
53893	`1` << `1`, `0`, `0`, `0`,
53894	`0`, `0`, `0`, `0`,
53895	`0`, `0`, `0`, `0`,
53896	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
53897	);
53898	let r = _mm512_mask_xor_epi32(a, `0`, a, b);
53899	assert_eq_m512i(r, a);
53900	let r = _mm512_mask_xor_epi32(a, `0b01111111_11111111`, a, b);
53901	#[rustfmt::skip]
53902	let e = _mm512_set_epi32(
53903	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53904	`0`, `0`, `0`, `0`,
53905	`0`, `0`, `0`, `0`,
53906	`0`, `0`, `0`, `1` << `1` \| `1` << `4`,
53907	);
53908	assert_eq_m512i(r, e);
53909	}
53910
53911	#[simd_test(enable = "avx512f")]
53912	unsafe fn test_mm512_maskz_xor_epi32() {
53913	#[rustfmt::skip]
53914	let a = _mm512_set_epi32(
53915	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
53916	`0`, `0`, `0`, `0`,
53917	`0`, `0`, `0`, `0`,
53918	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
53919	);
53920	#[rustfmt::skip]
53921	let b = _mm512_set_epi32(
53922	`1` << `1`, `0`, `0`, `0`,
53923	`0`, `0`, `0`, `0`,
53924	`0`, `0`, `0`, `0`,
53925	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
53926	);
53927	let r = _mm512_maskz_xor_epi32(`0`, a, b);
53928	assert_eq_m512i(r, _mm512_setzero_si512());
53929	let r = _mm512_maskz_xor_epi32(`0b00000000_11111111`, a, b);
53930	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `1` \| `1` << `4`);
53931	assert_eq_m512i(r, e);
53932	}
53933
53934	#[simd_test(enable = "avx512f,avx512vl")]
53935	unsafe fn test_mm256_xor_epi32() {
53936	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
53937	let b = _mm256_set1_epi32(`1` << `1`);
53938	let r = _mm256_xor_epi32(a, b);
53939	let e = _mm256_set1_epi32(`1` << `2`);
53940	assert_eq_m256i(r, e);
53941	}
53942
53943	#[simd_test(enable = "avx512f,avx512vl")]
53944	unsafe fn test_mm256_mask_xor_epi32() {
53945	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
53946	let b = _mm256_set1_epi32(`1` << `1`);
53947	let r = _mm256_mask_xor_epi32(a, `0`, a, b);
53948	assert_eq_m256i(r, a);
53949	let r = _mm256_mask_xor_epi32(a, `0b11111111`, a, b);
53950	let e = _mm256_set1_epi32(`1` << `2`);
53951	assert_eq_m256i(r, e);
53952	}
53953
53954	#[simd_test(enable = "avx512f,avx512vl")]
53955	unsafe fn test_mm256_maskz_xor_epi32() {
53956	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
53957	let b = _mm256_set1_epi32(`1` << `1`);
53958	let r = _mm256_maskz_xor_epi32(`0`, a, b);
53959	assert_eq_m256i(r, _mm256_setzero_si256());
53960	let r = _mm256_maskz_xor_epi32(`0b11111111`, a, b);
53961	let e = _mm256_set1_epi32(`1` << `2`);
53962	assert_eq_m256i(r, e);
53963	}
53964
53965	#[simd_test(enable = "avx512f,avx512vl")]
53966	unsafe fn test_mm_xor_epi32() {
53967	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
53968	let b = _mm_set1_epi32(`1` << `1`);
53969	let r = _mm_xor_epi32(a, b);
53970	let e = _mm_set1_epi32(`1` << `2`);
53971	assert_eq_m128i(r, e);
53972	}
53973
53974	#[simd_test(enable = "avx512f,avx512vl")]
53975	unsafe fn test_mm_mask_xor_epi32() {
53976	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
53977	let b = _mm_set1_epi32(`1` << `1`);
53978	let r = _mm_mask_xor_epi32(a, `0`, a, b);
53979	assert_eq_m128i(r, a);
53980	let r = _mm_mask_xor_epi32(a, `0b00001111`, a, b);
53981	let e = _mm_set1_epi32(`1` << `2`);
53982	assert_eq_m128i(r, e);
53983	}
53984
53985	#[simd_test(enable = "avx512f,avx512vl")]
53986	unsafe fn test_mm_maskz_xor_epi32() {
53987	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
53988	let b = _mm_set1_epi32(`1` << `1`);
53989	let r = _mm_maskz_xor_epi32(`0`, a, b);
53990	assert_eq_m128i(r, _mm_setzero_si128());
53991	let r = _mm_maskz_xor_epi32(`0b00001111`, a, b);
53992	let e = _mm_set1_epi32(`1` << `2`);
53993	assert_eq_m128i(r, e);
53994	}
53995
53996	#[simd_test(enable = "avx512f")]
53997	unsafe fn test_mm512_xor_si512() {
53998	#[rustfmt::skip]
53999	let a = _mm512_set_epi32(
54000	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
54001	`0`, `0`, `0`, `0`,
54002	`0`, `0`, `0`, `0`,
54003	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
54004	);
54005	#[rustfmt::skip]
54006	let b = _mm512_set_epi32(
54007	`1` << `1`, `0`, `0`, `0`,
54008	`0`, `0`, `0`, `0`,
54009	`0`, `0`, `0`, `0`,
54010	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
54011	);
54012	let r = _mm512_xor_epi32(a, b);
54013	#[rustfmt::skip]
54014	let e = _mm512_set_epi32(
54015	`1` << `2`, `0`, `0`, `0`,
54016	`0`, `0`, `0`, `0`,
54017	`0`, `0`, `0`, `0`,
54018	`0`, `0`, `0`, `1` << `1` \| `1` << `4`,
54019	);
54020	assert_eq_m512i(r, e);
54021	}
54022
54023	#[simd_test(enable = "avx512f")]
54024	unsafe fn test_mm512_andnot_epi32() {
54025	let a = _mm512_set1_epi32(`0`);
54026	let b = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
54027	let r = _mm512_andnot_epi32(a, b);
54028	let e = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
54029	assert_eq_m512i(r, e);
54030	}
54031
54032	#[simd_test(enable = "avx512f")]
54033	unsafe fn test_mm512_mask_andnot_epi32() {
54034	let a = _mm512_set1_epi32(`1` << `1` \| `1` << `2`);
54035	let b = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
54036	let r = _mm512_mask_andnot_epi32(a, `0`, a, b);
54037	assert_eq_m512i(r, a);
54038	let r = _mm512_mask_andnot_epi32(a, `0b11111111_11111111`, a, b);
54039	let e = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
54040	assert_eq_m512i(r, e);
54041	}
54042
54043	#[simd_test(enable = "avx512f")]
54044	unsafe fn test_mm512_maskz_andnot_epi32() {
54045	let a = _mm512_set1_epi32(`1` << `1` \| `1` << `2`);
54046	let b = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
54047	let r = _mm512_maskz_andnot_epi32(`0`, a, b);
54048	assert_eq_m512i(r, _mm512_setzero_si512());
54049	let r = _mm512_maskz_andnot_epi32(`0b00000000_11111111`, a, b);
54050	#[rustfmt::skip]
54051	let e = _mm512_set_epi32(
54052	`0`, `0`, `0`, `0`,
54053	`0`, `0`, `0`, `0`,
54054	`1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`,
54055	`1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`,
54056	);
54057	assert_eq_m512i(r, e);
54058	}
54059
54060	#[simd_test(enable = "avx512f,avx512vl")]
54061	unsafe fn test_mm256_mask_andnot_epi32() {
54062	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
54063	let b = _mm256_set1_epi32(`1` << `3` \| `1` << `4`);
54064	let r = _mm256_mask_andnot_epi32(a, `0`, a, b);
54065	assert_eq_m256i(r, a);
54066	let r = _mm256_mask_andnot_epi32(a, `0b11111111`, a, b);
54067	let e = _mm256_set1_epi32(`1` << `3` \| `1` << `4`);
54068	assert_eq_m256i(r, e);
54069	}
54070
54071	#[simd_test(enable = "avx512f,avx512vl")]
54072	unsafe fn test_mm256_maskz_andnot_epi32() {
54073	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
54074	let b = _mm256_set1_epi32(`1` << `3` \| `1` << `4`);
54075	let r = _mm256_maskz_andnot_epi32(`0`, a, b);
54076	assert_eq_m256i(r, _mm256_setzero_si256());
54077	let r = _mm256_maskz_andnot_epi32(`0b11111111`, a, b);
54078	let e = _mm256_set1_epi32(`1` << `3` \| `1` << `4`);
54079	assert_eq_m256i(r, e);
54080	}
54081
54082	#[simd_test(enable = "avx512f,avx512vl")]
54083	unsafe fn test_mm_mask_andnot_epi32() {
54084	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
54085	let b = _mm_set1_epi32(`1` << `3` \| `1` << `4`);
54086	let r = _mm_mask_andnot_epi32(a, `0`, a, b);
54087	assert_eq_m128i(r, a);
54088	let r = _mm_mask_andnot_epi32(a, `0b00001111`, a, b);
54089	let e = _mm_set1_epi32(`1` << `3` \| `1` << `4`);
54090	assert_eq_m128i(r, e);
54091	}
54092
54093	#[simd_test(enable = "avx512f,avx512vl")]
54094	unsafe fn test_mm_maskz_andnot_epi32() {
54095	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
54096	let b = _mm_set1_epi32(`1` << `3` \| `1` << `4`);
54097	let r = _mm_maskz_andnot_epi32(`0`, a, b);
54098	assert_eq_m128i(r, _mm_setzero_si128());
54099	let r = _mm_maskz_andnot_epi32(`0b00001111`, a, b);
54100	let e = _mm_set1_epi32(`1` << `3` \| `1` << `4`);
54101	assert_eq_m128i(r, e);
54102	}
54103
54104	#[simd_test(enable = "avx512f")]
54105	unsafe fn test_mm512_kand() {
54106	let a: u16 = `0b11001100_00110011`;
54107	let b: u16 = `0b11001100_00110011`;
54108	let r = _mm512_kand(a, b);
54109	let e: u16 = `0b11001100_00110011`;
54110	assert_eq!(r, e);
54111	}
54112
54113	#[simd_test(enable = "avx512f")]
54114	unsafe fn test_kand_mask16() {
54115	let a: u16 = `0b11001100_00110011`;
54116	let b: u16 = `0b11001100_00110011`;
54117	let r = _kand_mask16(a, b);
54118	let e: u16 = `0b11001100_00110011`;
54119	assert_eq!(r, e);
54120	}
54121
54122	#[simd_test(enable = "avx512f")]
54123	unsafe fn test_mm512_kor() {
54124	let a: u16 = `0b11001100_00110011`;
54125	let b: u16 = `0b00101110_00001011`;
54126	let r = _mm512_kor(a, b);
54127	let e: u16 = `0b11101110_00111011`;
54128	assert_eq!(r, e);
54129	}
54130
54131	#[simd_test(enable = "avx512f")]
54132	unsafe fn test_kor_mask16() {
54133	let a: u16 = `0b11001100_00110011`;
54134	let b: u16 = `0b00101110_00001011`;
54135	let r = _kor_mask16(a, b);
54136	let e: u16 = `0b11101110_00111011`;
54137	assert_eq!(r, e);
54138	}
54139
54140	#[simd_test(enable = "avx512f")]
54141	unsafe fn test_mm512_kxor() {
54142	let a: u16 = `0b11001100_00110011`;
54143	let b: u16 = `0b00101110_00001011`;
54144	let r = _mm512_kxor(a, b);
54145	let e: u16 = `0b11100010_00111000`;
54146	assert_eq!(r, e);
54147	}
54148
54149	#[simd_test(enable = "avx512f")]
54150	unsafe fn test_kxor_mask16() {
54151	let a: u16 = `0b11001100_00110011`;
54152	let b: u16 = `0b00101110_00001011`;
54153	let r = _kxor_mask16(a, b);
54154	let e: u16 = `0b11100010_00111000`;
54155	assert_eq!(r, e);
54156	}
54157
54158	#[simd_test(enable = "avx512f")]
54159	unsafe fn test_mm512_knot() {
54160	let a: u16 = `0b11001100_00110011`;
54161	let r = _mm512_knot(a);
54162	let e: u16 = `0b00110011_11001100`;
54163	assert_eq!(r, e);
54164	}
54165
54166	#[simd_test(enable = "avx512f")]
54167	unsafe fn test_knot_mask16() {
54168	let a: u16 = `0b11001100_00110011`;
54169	let r = _knot_mask16(a);
54170	let e: u16 = `0b00110011_11001100`;
54171	assert_eq!(r, e);
54172	}
54173
54174	#[simd_test(enable = "avx512f")]
54175	unsafe fn test_mm512_kandn() {
54176	let a: u16 = `0b11001100_00110011`;
54177	let b: u16 = `0b00101110_00001011`;
54178	let r = _mm512_kandn(a, b);
54179	let e: u16 = `0b00100010_00001000`;
54180	assert_eq!(r, e);
54181	}
54182
54183	#[simd_test(enable = "avx512f")]
54184	unsafe fn test_kandn_mask16() {
54185	let a: u16 = `0b11001100_00110011`;
54186	let b: u16 = `0b00101110_00001011`;
54187	let r = _kandn_mask16(a, b);
54188	let e: u16 = `0b00100010_00001000`;
54189	assert_eq!(r, e);
54190	}
54191
54192	#[simd_test(enable = "avx512f")]
54193	unsafe fn test_mm512_kxnor() {
54194	let a: u16 = `0b11001100_00110011`;
54195	let b: u16 = `0b00101110_00001011`;
54196	let r = _mm512_kxnor(a, b);
54197	let e: u16 = `0b00011101_11000111`;
54198	assert_eq!(r, e);
54199	}
54200
54201	#[simd_test(enable = "avx512f")]
54202	unsafe fn test_kxnor_mask16() {
54203	let a: u16 = `0b11001100_00110011`;
54204	let b: u16 = `0b00101110_00001011`;
54205	let r = _kxnor_mask16(a, b);
54206	let e: u16 = `0b00011101_11000111`;
54207	assert_eq!(r, e);
54208	}
54209
54210	#[simd_test(enable = "avx512f")]
54211	unsafe fn test_mm512_kmov() {
54212	let a: u16 = `0b11001100_00110011`;
54213	let r = _mm512_kmov(a);
54214	let e: u16 = `0b11001100_00110011`;
54215	assert_eq!(r, e);
54216	}
54217
54218	#[simd_test(enable = "avx512f")]
54219	unsafe fn test_mm512_int2mask() {
54220	let a: i32 = `0b11001100_00110011`;
54221	let r = _mm512_int2mask(a);
54222	let e: u16 = `0b11001100_00110011`;
54223	assert_eq!(r, e);
54224	}
54225
54226	#[simd_test(enable = "avx512f")]
54227	unsafe fn test_mm512_mask2int() {
54228	let k1: __mmask16 = `0b11001100_00110011`;
54229	let r = _mm512_mask2int(k1);
54230	let e: i32 = `0b11001100_00110011`;
54231	assert_eq!(r, e);
54232	}
54233
54234	#[simd_test(enable = "avx512f")]
54235	unsafe fn test_mm512_kunpackb() {
54236	let a: u16 = `0b11001100_00110011`;
54237	let b: u16 = `0b00101110_00001011`;
54238	let r = _mm512_kunpackb(a, b);
54239	let e: u16 = `0b00101110_00110011`;
54240	assert_eq!(r, e);
54241	}
54242
54243	#[simd_test(enable = "avx512f")]
54244	unsafe fn test_mm512_kortestc() {
54245	let a: u16 = `0b11001100_00110011`;
54246	let b: u16 = `0b00101110_00001011`;
54247	let r = _mm512_kortestc(a, b);
54248	assert_eq!(r, `0`);
54249	let b: u16 = `0b11111111_11111111`;
54250	let r = _mm512_kortestc(a, b);
54251	assert_eq!(r, `1`);
54252	}
54253
54254	#[simd_test(enable = "avx512f")]
54255	unsafe fn test_mm512_test_epi32_mask() {
54256	let a = _mm512_set1_epi32(`1` << `0`);
54257	let b = _mm512_set1_epi32(`1` << `0` \| `1` << `1`);
54258	let r = _mm512_test_epi32_mask(a, b);
54259	let e: __mmask16 = `0b11111111_11111111`;
54260	assert_eq!(r, e);
54261	}
54262
54263	#[simd_test(enable = "avx512f")]
54264	unsafe fn test_mm512_mask_test_epi32_mask() {
54265	let a = _mm512_set1_epi32(`1` << `0`);
54266	let b = _mm512_set1_epi32(`1` << `0` \| `1` << `1`);
54267	let r = _mm512_mask_test_epi32_mask(`0`, a, b);
54268	assert_eq!(r, `0`);
54269	let r = _mm512_mask_test_epi32_mask(`0b11111111_11111111`, a, b);
54270	let e: __mmask16 = `0b11111111_11111111`;
54271	assert_eq!(r, e);
54272	}
54273
54274	#[simd_test(enable = "avx512f,avx512vl")]
54275	unsafe fn test_mm256_test_epi32_mask() {
54276	let a = _mm256_set1_epi32(`1` << `0`);
54277	let b = _mm256_set1_epi32(`1` << `0` \| `1` << `1`);
54278	let r = _mm256_test_epi32_mask(a, b);
54279	let e: __mmask8 = `0b11111111`;
54280	assert_eq!(r, e);
54281	}
54282
54283	#[simd_test(enable = "avx512f,avx512vl")]
54284	unsafe fn test_mm256_mask_test_epi32_mask() {
54285	let a = _mm256_set1_epi32(`1` << `0`);
54286	let b = _mm256_set1_epi32(`1` << `0` \| `1` << `1`);
54287	let r = _mm256_mask_test_epi32_mask(`0`, a, b);
54288	assert_eq!(r, `0`);
54289	let r = _mm256_mask_test_epi32_mask(`0b11111111`, a, b);
54290	let e: __mmask8 = `0b11111111`;
54291	assert_eq!(r, e);
54292	}
54293
54294	#[simd_test(enable = "avx512f,avx512vl")]
54295	unsafe fn test_mm_test_epi32_mask() {
54296	let a = _mm_set1_epi32(`1` << `0`);
54297	let b = _mm_set1_epi32(`1` << `0` \| `1` << `1`);
54298	let r = _mm_test_epi32_mask(a, b);
54299	let e: __mmask8 = `0b00001111`;
54300	assert_eq!(r, e);
54301	}
54302
54303	#[simd_test(enable = "avx512f,avx512vl")]
54304	unsafe fn test_mm_mask_test_epi32_mask() {
54305	let a = _mm_set1_epi32(`1` << `0`);
54306	let b = _mm_set1_epi32(`1` << `0` \| `1` << `1`);
54307	let r = _mm_mask_test_epi32_mask(`0`, a, b);
54308	assert_eq!(r, `0`);
54309	let r = _mm_mask_test_epi32_mask(`0b11111111`, a, b);
54310	let e: __mmask8 = `0b00001111`;
54311	assert_eq!(r, e);
54312	}
54313
54314	#[simd_test(enable = "avx512f")]
54315	unsafe fn test_mm512_testn_epi32_mask() {
54316	let a = _mm512_set1_epi32(`1` << `0`);
54317	let b = _mm512_set1_epi32(`1` << `0` \| `1` << `1`);
54318	let r = _mm512_testn_epi32_mask(a, b);
54319	let e: __mmask16 = `0b00000000_00000000`;
54320	assert_eq!(r, e);
54321	}
54322
54323	#[simd_test(enable = "avx512f")]
54324	unsafe fn test_mm512_mask_testn_epi32_mask() {
54325	let a = _mm512_set1_epi32(`1` << `0`);
54326	let b = _mm512_set1_epi32(`1` << `1`);
54327	let r = _mm512_mask_test_epi32_mask(`0`, a, b);
54328	assert_eq!(r, `0`);
54329	let r = _mm512_mask_testn_epi32_mask(`0b11111111_11111111`, a, b);
54330	let e: __mmask16 = `0b11111111_11111111`;
54331	assert_eq!(r, e);
54332	}
54333
54334	#[simd_test(enable = "avx512f,avx512vl")]
54335	unsafe fn test_mm256_testn_epi32_mask() {
54336	let a = _mm256_set1_epi32(`1` << `0`);
54337	let b = _mm256_set1_epi32(`1` << `1`);
54338	let r = _mm256_testn_epi32_mask(a, b);
54339	let e: __mmask8 = `0b11111111`;
54340	assert_eq!(r, e);
54341	}
54342
54343	#[simd_test(enable = "avx512f,avx512vl")]
54344	unsafe fn test_mm256_mask_testn_epi32_mask() {
54345	let a = _mm256_set1_epi32(`1` << `0`);
54346	let b = _mm256_set1_epi32(`1` << `1`);
54347	let r = _mm256_mask_test_epi32_mask(`0`, a, b);
54348	assert_eq!(r, `0`);
54349	let r = _mm256_mask_testn_epi32_mask(`0b11111111`, a, b);
54350	let e: __mmask8 = `0b11111111`;
54351	assert_eq!(r, e);
54352	}
54353
54354	#[simd_test(enable = "avx512f,avx512vl")]
54355	unsafe fn test_mm_testn_epi32_mask() {
54356	let a = _mm_set1_epi32(`1` << `0`);
54357	let b = _mm_set1_epi32(`1` << `1`);
54358	let r = _mm_testn_epi32_mask(a, b);
54359	let e: __mmask8 = `0b00001111`;
54360	assert_eq!(r, e);
54361	}
54362
54363	#[simd_test(enable = "avx512f,avx512vl")]
54364	unsafe fn test_mm_mask_testn_epi32_mask() {
54365	let a = _mm_set1_epi32(`1` << `0`);
54366	let b = _mm_set1_epi32(`1` << `1`);
54367	let r = _mm_mask_test_epi32_mask(`0`, a, b);
54368	assert_eq!(r, `0`);
54369	let r = _mm_mask_testn_epi32_mask(`0b11111111`, a, b);
54370	let e: __mmask8 = `0b00001111`;
54371	assert_eq!(r, e);
54372	}
54373
54374	#[simd_test(enable = "avx512f")]
54375	unsafe fn test_mm512_stream_ps() {
54376	#[repr(align(`32`))]
54377	struct Memory {
54378	pub data: [f32; `16`],
54379	}
54380	let a = _mm512_set1_ps(`7.0`);
54381	let mut mem = Memory { data: [`-1.0`; `16`] };
54382
54383	_mm512_stream_ps(&mut mem.data[`0`] as *mut f32, a);
54384	for i in `0`..`16` {
54385	assert_eq!(mem.data[i], get_m512(a, i));
54386	}
54387	}
54388
54389	#[simd_test(enable = "avx512f")]
54390	unsafe fn test_mm512_reduce_add_epi32() {
54391	let a = _mm512_set1_epi32(`1`);
54392	let e: i32 = _mm512_reduce_add_epi32(a);
54393	assert_eq!(`16`, e);
54394	}
54395
54396	#[simd_test(enable = "avx512f")]
54397	unsafe fn test_mm512_mask_reduce_add_epi32() {
54398	let a = _mm512_set1_epi32(`1`);
54399	let e: i32 = _mm512_mask_reduce_add_epi32(`0b11111111_00000000`, a);
54400	assert_eq!(`8`, e);
54401	}
54402
54403	#[simd_test(enable = "avx512f")]
54404	unsafe fn test_mm512_reduce_add_ps() {
54405	let a = _mm512_set1_ps(`1.`);
54406	let e: f32 = _mm512_reduce_add_ps(a);
54407	assert_eq!(`16.`, e);
54408	}
54409
54410	#[simd_test(enable = "avx512f")]
54411	unsafe fn test_mm512_mask_reduce_add_ps() {
54412	let a = _mm512_set1_ps(`1.`);
54413	let e: f32 = _mm512_mask_reduce_add_ps(`0b11111111_00000000`, a);
54414	assert_eq!(`8.`, e);
54415	}
54416
54417	#[simd_test(enable = "avx512f")]
54418	unsafe fn test_mm512_reduce_mul_epi32() {
54419	let a = _mm512_set1_epi32(`2`);
54420	let e: i32 = _mm512_reduce_mul_epi32(a);
54421	assert_eq!(`65536`, e);
54422	}
54423
54424	#[simd_test(enable = "avx512f")]
54425	unsafe fn test_mm512_mask_reduce_mul_epi32() {
54426	let a = _mm512_set1_epi32(`2`);
54427	let e: i32 = _mm512_mask_reduce_mul_epi32(`0b11111111_00000000`, a);
54428	assert_eq!(`256`, e);
54429	}
54430
54431	#[simd_test(enable = "avx512f")]
54432	unsafe fn test_mm512_reduce_mul_ps() {
54433	let a = _mm512_set1_ps(`2.`);
54434	let e: f32 = _mm512_reduce_mul_ps(a);
54435	assert_eq!(`65536.`, e);
54436	}
54437
54438	#[simd_test(enable = "avx512f")]
54439	unsafe fn test_mm512_mask_reduce_mul_ps() {
54440	let a = _mm512_set1_ps(`2.`);
54441	let e: f32 = _mm512_mask_reduce_mul_ps(`0b11111111_00000000`, a);
54442	assert_eq!(`256.`, e);
54443	}
54444
54445	#[simd_test(enable = "avx512f")]
54446	unsafe fn test_mm512_reduce_max_epi32() {
54447	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
54448	let e: i32 = _mm512_reduce_max_epi32(a);
54449	assert_eq!(`15`, e);
54450	}
54451
54452	#[simd_test(enable = "avx512f")]
54453	unsafe fn test_mm512_mask_reduce_max_epi32() {
54454	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
54455	let e: i32 = _mm512_mask_reduce_max_epi32(`0b11111111_00000000`, a);
54456	assert_eq!(`7`, e);
54457	}
54458
54459	#[simd_test(enable = "avx512f")]
54460	unsafe fn test_mm512_reduce_max_epu32() {
54461	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
54462	let e: u32 = _mm512_reduce_max_epu32(a);
54463	assert_eq!(`15`, e);
54464	}
54465
54466	#[simd_test(enable = "avx512f")]
54467	unsafe fn test_mm512_mask_reduce_max_epu32() {
54468	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
54469	let e: u32 = _mm512_mask_reduce_max_epu32(`0b11111111_00000000`, a);
54470	assert_eq!(`7`, e);
54471	}
54472
54473	#[simd_test(enable = "avx512f")]
54474	unsafe fn test_mm512_reduce_max_ps() {
54475	let a = _mm512_set_ps(
54476	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
54477	);
54478	let e: f32 = _mm512_reduce_max_ps(a);
54479	assert_eq!(`15.`, e);
54480	}
54481
54482	#[simd_test(enable = "avx512f")]
54483	unsafe fn test_mm512_mask_reduce_max_ps() {
54484	let a = _mm512_set_ps(
54485	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
54486	);
54487	let e: f32 = _mm512_mask_reduce_max_ps(`0b11111111_00000000`, a);
54488	assert_eq!(`7.`, e);
54489	}
54490
54491	#[simd_test(enable = "avx512f")]
54492	unsafe fn test_mm512_reduce_min_epi32() {
54493	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
54494	let e: i32 = _mm512_reduce_min_epi32(a);
54495	assert_eq!(`0`, e);
54496	}
54497
54498	#[simd_test(enable = "avx512f")]
54499	unsafe fn test_mm512_mask_reduce_min_epi32() {
54500	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
54501	let e: i32 = _mm512_mask_reduce_min_epi32(`0b11111111_00000000`, a);
54502	assert_eq!(`0`, e);
54503	}
54504
54505	#[simd_test(enable = "avx512f")]
54506	unsafe fn test_mm512_reduce_min_epu32() {
54507	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
54508	let e: u32 = _mm512_reduce_min_epu32(a);
54509	assert_eq!(`0`, e);
54510	}
54511
54512	#[simd_test(enable = "avx512f")]
54513	unsafe fn test_mm512_mask_reduce_min_epu32() {
54514	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
54515	let e: u32 = _mm512_mask_reduce_min_epu32(`0b11111111_00000000`, a);
54516	assert_eq!(`0`, e);
54517	}
54518
54519	#[simd_test(enable = "avx512f")]
54520	unsafe fn test_mm512_reduce_min_ps() {
54521	let a = _mm512_set_ps(
54522	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
54523	);
54524	let e: f32 = _mm512_reduce_min_ps(a);
54525	assert_eq!(`0.`, e);
54526	}
54527
54528	#[simd_test(enable = "avx512f")]
54529	unsafe fn test_mm512_mask_reduce_min_ps() {
54530	let a = _mm512_set_ps(
54531	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
54532	);
54533	let e: f32 = _mm512_mask_reduce_min_ps(`0b11111111_00000000`, a);
54534	assert_eq!(`0.`, e);
54535	}
54536
54537	#[simd_test(enable = "avx512f")]
54538	unsafe fn test_mm512_reduce_and_epi32() {
54539	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54540	let e: i32 = _mm512_reduce_and_epi32(a);
54541	assert_eq!(`0`, e);
54542	}
54543
54544	#[simd_test(enable = "avx512f")]
54545	unsafe fn test_mm512_mask_reduce_and_epi32() {
54546	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54547	let e: i32 = _mm512_mask_reduce_and_epi32(`0b11111111_00000000`, a);
54548	assert_eq!(`1`, e);
54549	}
54550
54551	#[simd_test(enable = "avx512f")]
54552	unsafe fn test_mm512_reduce_or_epi32() {
54553	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54554	let e: i32 = _mm512_reduce_or_epi32(a);
54555	assert_eq!(`3`, e);
54556	}
54557
54558	#[simd_test(enable = "avx512f")]
54559	unsafe fn test_mm512_mask_reduce_or_epi32() {
54560	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54561	let e: i32 = _mm512_mask_reduce_and_epi32(`0b11111111_00000000`, a);
54562	assert_eq!(`1`, e);
54563	}
54564
54565	#[simd_test(enable = "avx512f")]
54566	unsafe fn test_mm512_mask_compress_epi32() {
54567	let src = _mm512_set1_epi32(`200`);
54568	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
54569	let r = _mm512_mask_compress_epi32(src, `0`, a);
54570	assert_eq_m512i(r, src);
54571	let r = _mm512_mask_compress_epi32(src, `0b01010101_01010101`, a);
54572	let e = _mm512_set_epi32(
54573	`200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`,
54574	);
54575	assert_eq_m512i(r, e);
54576	}
54577
54578	#[simd_test(enable = "avx512f")]
54579	unsafe fn test_mm512_maskz_compress_epi32() {
54580	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
54581	let r = _mm512_maskz_compress_epi32(`0`, a);
54582	assert_eq_m512i(r, _mm512_setzero_si512());
54583	let r = _mm512_maskz_compress_epi32(`0b01010101_01010101`, a);
54584	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`);
54585	assert_eq_m512i(r, e);
54586	}
54587
54588	#[simd_test(enable = "avx512f,avx512vl")]
54589	unsafe fn test_mm256_mask_compress_epi32() {
54590	let src = _mm256_set1_epi32(`200`);
54591	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
54592	let r = _mm256_mask_compress_epi32(src, `0`, a);
54593	assert_eq_m256i(r, src);
54594	let r = _mm256_mask_compress_epi32(src, `0b01010101`, a);
54595	let e = _mm256_set_epi32(`200`, `200`, `200`, `200`, `1`, `3`, `5`, `7`);
54596	assert_eq_m256i(r, e);
54597	}
54598
54599	#[simd_test(enable = "avx512f,avx512vl")]
54600	unsafe fn test_mm256_maskz_compress_epi32() {
54601	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
54602	let r = _mm256_maskz_compress_epi32(`0`, a);
54603	assert_eq_m256i(r, _mm256_setzero_si256());
54604	let r = _mm256_maskz_compress_epi32(`0b01010101`, a);
54605	let e = _mm256_set_epi32(`0`, `0`, `0`, `0`, `1`, `3`, `5`, `7`);
54606	assert_eq_m256i(r, e);
54607	}
54608
54609	#[simd_test(enable = "avx512f,avx512vl")]
54610	unsafe fn test_mm_mask_compress_epi32() {
54611	let src = _mm_set1_epi32(`200`);
54612	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
54613	let r = _mm_mask_compress_epi32(src, `0`, a);
54614	assert_eq_m128i(r, src);
54615	let r = _mm_mask_compress_epi32(src, `0b00000101`, a);
54616	let e = _mm_set_epi32(`200`, `200`, `1`, `3`);
54617	assert_eq_m128i(r, e);
54618	}
54619
54620	#[simd_test(enable = "avx512f,avx512vl")]
54621	unsafe fn test_mm_maskz_compress_epi32() {
54622	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
54623	let r = _mm_maskz_compress_epi32(`0`, a);
54624	assert_eq_m128i(r, _mm_setzero_si128());
54625	let r = _mm_maskz_compress_epi32(`0b00000101`, a);
54626	let e = _mm_set_epi32(`0`, `0`, `1`, `3`);
54627	assert_eq_m128i(r, e);
54628	}
54629
54630	#[simd_test(enable = "avx512f")]
54631	unsafe fn test_mm512_mask_compress_ps() {
54632	let src = _mm512_set1_ps(`200.`);
54633	let a = _mm512_set_ps(
54634	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
54635	);
54636	let r = _mm512_mask_compress_ps(src, `0`, a);
54637	assert_eq_m512(r, src);
54638	let r = _mm512_mask_compress_ps(src, `0b01010101_01010101`, a);
54639	let e = _mm512_set_ps(
54640	`200.`, `200.`, `200.`, `200.`, `200.`, `200.`, `200.`, `200.`, `1.`, `3.`, `5.`, `7.`, `9.`, `11.`, `13.`, `15.`,
54641	);
54642	assert_eq_m512(r, e);
54643	}
54644
54645	#[simd_test(enable = "avx512f")]
54646	unsafe fn test_mm512_maskz_compress_ps() {
54647	let a = _mm512_set_ps(
54648	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
54649	);
54650	let r = _mm512_maskz_compress_ps(`0`, a);
54651	assert_eq_m512(r, _mm512_setzero_ps());
54652	let r = _mm512_maskz_compress_ps(`0b01010101_01010101`, a);
54653	let e = _mm512_set_ps(
54654	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `3.`, `5.`, `7.`, `9.`, `11.`, `13.`, `15.`,
54655	);
54656	assert_eq_m512(r, e);
54657	}
54658
54659	#[simd_test(enable = "avx512f,avx512vl")]
54660	unsafe fn test_mm256_mask_compress_ps() {
54661	let src = _mm256_set1_ps(`200.`);
54662	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
54663	let r = _mm256_mask_compress_ps(src, `0`, a);
54664	assert_eq_m256(r, src);
54665	let r = _mm256_mask_compress_ps(src, `0b01010101`, a);
54666	let e = _mm256_set_ps(`200.`, `200.`, `200.`, `200.`, `1.`, `3.`, `5.`, `7.`);
54667	assert_eq_m256(r, e);
54668	}
54669
54670	#[simd_test(enable = "avx512f,avx512vl")]
54671	unsafe fn test_mm256_maskz_compress_ps() {
54672	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
54673	let r = _mm256_maskz_compress_ps(`0`, a);
54674	assert_eq_m256(r, _mm256_setzero_ps());
54675	let r = _mm256_maskz_compress_ps(`0b01010101`, a);
54676	let e = _mm256_set_ps(`0.`, `0.`, `0.`, `0.`, `1.`, `3.`, `5.`, `7.`);
54677	assert_eq_m256(r, e);
54678	}
54679
54680	#[simd_test(enable = "avx512f,avx512vl")]
54681	unsafe fn test_mm_mask_compress_ps() {
54682	let src = _mm_set1_ps(`200.`);
54683	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
54684	let r = _mm_mask_compress_ps(src, `0`, a);
54685	assert_eq_m128(r, src);
54686	let r = _mm_mask_compress_ps(src, `0b00000101`, a);
54687	let e = _mm_set_ps(`200.`, `200.`, `1.`, `3.`);
54688	assert_eq_m128(r, e);
54689	}
54690
54691	#[simd_test(enable = "avx512f,avx512vl")]
54692	unsafe fn test_mm_maskz_compress_ps() {
54693	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
54694	let r = _mm_maskz_compress_ps(`0`, a);
54695	assert_eq_m128(r, _mm_setzero_ps());
54696	let r = _mm_maskz_compress_ps(`0b00000101`, a);
54697	let e = _mm_set_ps(`0.`, `0.`, `1.`, `3.`);
54698	assert_eq_m128(r, e);
54699	}
54700
54701	#[simd_test(enable = "avx512f")]
54702	unsafe fn test_mm512_mask_compressstoreu_epi32() {
54703	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
54704	let mut r = [`0_i32`; `16`];
54705	_mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, `0`, a);
54706	assert_eq!(&r, &[`0_i32`; `16`]);
54707	_mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, `0b1111000011001010`, a);
54708	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `13`, `14`, `15`, `16`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]);
54709	}
54710
54711	#[simd_test(enable = "avx512f,avx512vl")]
54712	unsafe fn test_mm256_mask_compressstoreu_epi32() {
54713	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
54714	let mut r = [`0_i32`; `8`];
54715	_mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, `0`, a);
54716	assert_eq!(&r, &[`0_i32`; `8`]);
54717	_mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, `0b11001010`, a);
54718	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `0`, `0`, `0`, `0`]);
54719	}
54720
54721	#[simd_test(enable = "avx512f,avx512vl")]
54722	unsafe fn test_mm_mask_compressstoreu_epi32() {
54723	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
54724	let mut r = [`0_i32`; `4`];
54725	_mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, `0`, a);
54726	assert_eq!(&r, &[`0_i32`; `4`]);
54727	_mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, `0b1011`, a);
54728	assert_eq!(&r, &[`1`, `2`, `4`, `0`]);
54729	}
54730
54731	#[simd_test(enable = "avx512f")]
54732	unsafe fn test_mm512_mask_compressstoreu_epi64() {
54733	let a = _mm512_setr_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
54734	let mut r = [`0_i64`; `8`];
54735	_mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, `0`, a);
54736	assert_eq!(&r, &[`0_i64`; `8`]);
54737	_mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, `0b11001010`, a);
54738	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `0`, `0`, `0`, `0`]);
54739	}
54740
54741	#[simd_test(enable = "avx512f,avx512vl")]
54742	unsafe fn test_mm256_mask_compressstoreu_epi64() {
54743	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
54744	let mut r = [`0_i64`; `4`];
54745	_mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, `0`, a);
54746	assert_eq!(&r, &[`0_i64`; `4`]);
54747	_mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, `0b1011`, a);
54748	assert_eq!(&r, &[`1`, `2`, `4`, `0`]);
54749	}
54750
54751	#[simd_test(enable = "avx512f,avx512vl")]
54752	unsafe fn test_mm_mask_compressstoreu_epi64() {
54753	let a = _mm_setr_epi64x(`1`, `2`);
54754	let mut r = [`0_i64`; `2`];
54755	_mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, `0`, a);
54756	assert_eq!(&r, &[`0_i64`; `2`]);
54757	_mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, `0b10`, a);
54758	assert_eq!(&r, &[`2`, `0`]);
54759	}
54760
54761	#[simd_test(enable = "avx512f")]
54762	unsafe fn test_mm512_mask_compressstoreu_ps() {
54763	let a = _mm512_setr_ps(
54764	`1_f32`, `2_f32`, `3_f32`, `4_f32`, `5_f32`, `6_f32`, `7_f32`, `8_f32`, `9_f32`, `10_f32`, `11_f32`, `12_f32`,
54765	`13_f32`, `14_f32`, `15_f32`, `16_f32`,
54766	);
54767	let mut r = [`0_f32`; `16`];
54768	_mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, `0`, a);
54769	assert_eq!(&r, &[`0_f32`; `16`]);
54770	_mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, `0b1111000011001010`, a);
54771	assert_eq!(
54772	&r,
54773	&[
54774	`2_f32`, `4_f32`, `7_f32`, `8_f32`, `13_f32`, `14_f32`, `15_f32`, `16_f32`, `0_f32`, `0_f32`, `0_f32`,
54775	`0_f32`, `0_f32`, `0_f32`, `0_f32`, `0_f32`
54776	]
54777	);
54778	}
54779
54780	#[simd_test(enable = "avx512f,avx512vl")]
54781	unsafe fn test_mm256_mask_compressstoreu_ps() {
54782	let a = _mm256_setr_ps(`1_f32`, `2_f32`, `3_f32`, `4_f32`, `5_f32`, `6_f32`, `7_f32`, `8_f32`);
54783	let mut r = [`0_f32`; `8`];
54784	_mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, `0`, a);
54785	assert_eq!(&r, &[`0_f32`; `8`]);
54786	_mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, `0b11001010`, a);
54787	assert_eq!(
54788	&r,
54789	&[`2_f32`, `4_f32`, `7_f32`, `8_f32`, `0_f32`, `0_f32`, `0_f32`, `0_f32`]
54790	);
54791	}
54792
54793	#[simd_test(enable = "avx512f,avx512vl")]
54794	unsafe fn test_mm_mask_compressstoreu_ps() {
54795	let a = _mm_setr_ps(`1_f32`, `2_f32`, `3_f32`, `4_f32`);
54796	let mut r = [`0.`; `4`];
54797	_mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, `0`, a);
54798	assert_eq!(&r, &[`0.`; `4`]);
54799	_mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, `0b1011`, a);
54800	assert_eq!(&r, &[`1_f32`, `2_f32`, `4_f32`, `0_f32`]);
54801	}
54802
54803	#[simd_test(enable = "avx512f")]
54804	unsafe fn test_mm512_mask_compressstoreu_pd() {
54805	let a = _mm512_setr_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
54806	let mut r = [`0.`; `8`];
54807	_mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, `0`, a);
54808	assert_eq!(&r, &[`0.`; `8`]);
54809	_mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, `0b11001010`, a);
54810	assert_eq!(&r, &[`2.`, `4.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`]);
54811	}
54812
54813	#[simd_test(enable = "avx512f,avx512vl")]
54814	unsafe fn test_mm256_mask_compressstoreu_pd() {
54815	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
54816	let mut r = [`0.`; `4`];
54817	_mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, `0`, a);
54818	assert_eq!(&r, &[`0.`; `4`]);
54819	_mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, `0b1011`, a);
54820	assert_eq!(&r, &[`1.`, `2.`, `4.`, `0.`]);
54821	}
54822
54823	#[simd_test(enable = "avx512f,avx512vl")]
54824	unsafe fn test_mm_mask_compressstoreu_pd() {
54825	let a = _mm_setr_pd(`1.`, `2.`);
54826	let mut r = [`0.`; `2`];
54827	_mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, `0`, a);
54828	assert_eq!(&r, &[`0.`; `2`]);
54829	_mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, `0b10`, a);
54830	assert_eq!(&r, &[`2.`, `0.`]);
54831	}
54832
54833	#[simd_test(enable = "avx512f")]
54834	unsafe fn test_mm512_mask_expand_epi32() {
54835	let src = _mm512_set1_epi32(`200`);
54836	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
54837	let r = _mm512_mask_expand_epi32(src, `0`, a);
54838	assert_eq_m512i(r, src);
54839	let r = _mm512_mask_expand_epi32(src, `0b01010101_01010101`, a);
54840	let e = _mm512_set_epi32(
54841	`200`, `8`, `200`, `9`, `200`, `10`, `200`, `11`, `200`, `12`, `200`, `13`, `200`, `14`, `200`, `15`,
54842	);
54843	assert_eq_m512i(r, e);
54844	}
54845
54846	#[simd_test(enable = "avx512f")]
54847	unsafe fn test_mm512_maskz_expand_epi32() {
54848	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
54849	let r = _mm512_maskz_expand_epi32(`0`, a);
54850	assert_eq_m512i(r, _mm512_setzero_si512());
54851	let r = _mm512_maskz_expand_epi32(`0b01010101_01010101`, a);
54852	let e = _mm512_set_epi32(`0`, `8`, `0`, `9`, `0`, `10`, `0`, `11`, `0`, `12`, `0`, `13`, `0`, `14`, `0`, `15`);
54853	assert_eq_m512i(r, e);
54854	}
54855
54856	#[simd_test(enable = "avx512f,avx512vl")]
54857	unsafe fn test_mm256_mask_expand_epi32() {
54858	let src = _mm256_set1_epi32(`200`);
54859	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
54860	let r = _mm256_mask_expand_epi32(src, `0`, a);
54861	assert_eq_m256i(r, src);
54862	let r = _mm256_mask_expand_epi32(src, `0b01010101`, a);
54863	let e = _mm256_set_epi32(`200`, `4`, `200`, `5`, `200`, `6`, `200`, `7`);
54864	assert_eq_m256i(r, e);
54865	}
54866
54867	#[simd_test(enable = "avx512f,avx512vl")]
54868	unsafe fn test_mm256_maskz_expand_epi32() {
54869	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
54870	let r = _mm256_maskz_expand_epi32(`0`, a);
54871	assert_eq_m256i(r, _mm256_setzero_si256());
54872	let r = _mm256_maskz_expand_epi32(`0b01010101`, a);
54873	let e = _mm256_set_epi32(`0`, `4`, `0`, `5`, `0`, `6`, `0`, `7`);
54874	assert_eq_m256i(r, e);
54875	}
54876
54877	#[simd_test(enable = "avx512f,avx512vl")]
54878	unsafe fn test_mm_mask_expand_epi32() {
54879	let src = _mm_set1_epi32(`200`);
54880	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
54881	let r = _mm_mask_expand_epi32(src, `0`, a);
54882	assert_eq_m128i(r, src);
54883	let r = _mm_mask_expand_epi32(src, `0b00000101`, a);
54884	let e = _mm_set_epi32(`200`, `2`, `200`, `3`);
54885	assert_eq_m128i(r, e);
54886	}
54887
54888	#[simd_test(enable = "avx512f,avx512vl")]
54889	unsafe fn test_mm_maskz_expand_epi32() {
54890	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
54891	let r = _mm_maskz_expand_epi32(`0`, a);
54892	assert_eq_m128i(r, _mm_setzero_si128());
54893	let r = _mm_maskz_expand_epi32(`0b00000101`, a);
54894	let e = _mm_set_epi32(`0`, `2`, `0`, `3`);
54895	assert_eq_m128i(r, e);
54896	}
54897
54898	#[simd_test(enable = "avx512f")]
54899	unsafe fn test_mm512_mask_expand_ps() {
54900	let src = _mm512_set1_ps(`200.`);
54901	let a = _mm512_set_ps(
54902	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
54903	);
54904	let r = _mm512_mask_expand_ps(src, `0`, a);
54905	assert_eq_m512(r, src);
54906	let r = _mm512_mask_expand_ps(src, `0b01010101_01010101`, a);
54907	let e = _mm512_set_ps(
54908	`200.`, `8.`, `200.`, `9.`, `200.`, `10.`, `200.`, `11.`, `200.`, `12.`, `200.`, `13.`, `200.`, `14.`, `200.`, `15.`,
54909	);
54910	assert_eq_m512(r, e);
54911	}
54912
54913	#[simd_test(enable = "avx512f")]
54914	unsafe fn test_mm512_maskz_expand_ps() {
54915	let a = _mm512_set_ps(
54916	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
54917	);
54918	let r = _mm512_maskz_expand_ps(`0`, a);
54919	assert_eq_m512(r, _mm512_setzero_ps());
54920	let r = _mm512_maskz_expand_ps(`0b01010101_01010101`, a);
54921	let e = _mm512_set_ps(
54922	`0.`, `8.`, `0.`, `9.`, `0.`, `10.`, `0.`, `11.`, `0.`, `12.`, `0.`, `13.`, `0.`, `14.`, `0.`, `15.`,
54923	);
54924	assert_eq_m512(r, e);
54925	}
54926
54927	#[simd_test(enable = "avx512f,avx512vl")]
54928	unsafe fn test_mm256_mask_expand_ps() {
54929	let src = _mm256_set1_ps(`200.`);
54930	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
54931	let r = _mm256_mask_expand_ps(src, `0`, a);
54932	assert_eq_m256(r, src);
54933	let r = _mm256_mask_expand_ps(src, `0b01010101`, a);
54934	let e = _mm256_set_ps(`200.`, `4.`, `200.`, `5.`, `200.`, `6.`, `200.`, `7.`);
54935	assert_eq_m256(r, e);
54936	}
54937
54938	#[simd_test(enable = "avx512f,avx512vl")]
54939	unsafe fn test_mm256_maskz_expand_ps() {
54940	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
54941	let r = _mm256_maskz_expand_ps(`0`, a);
54942	assert_eq_m256(r, _mm256_setzero_ps());
54943	let r = _mm256_maskz_expand_ps(`0b01010101`, a);
54944	let e = _mm256_set_ps(`0.`, `4.`, `0.`, `5.`, `0.`, `6.`, `0.`, `7.`);
54945	assert_eq_m256(r, e);
54946	}
54947
54948	#[simd_test(enable = "avx512f,avx512vl")]
54949	unsafe fn test_mm_mask_expand_ps() {
54950	let src = _mm_set1_ps(`200.`);
54951	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
54952	let r = _mm_mask_expand_ps(src, `0`, a);
54953	assert_eq_m128(r, src);
54954	let r = _mm_mask_expand_ps(src, `0b00000101`, a);
54955	let e = _mm_set_ps(`200.`, `2.`, `200.`, `3.`);
54956	assert_eq_m128(r, e);
54957	}
54958
54959	#[simd_test(enable = "avx512f,avx512vl")]
54960	unsafe fn test_mm_maskz_expand_ps() {
54961	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
54962	let r = _mm_maskz_expand_ps(`0`, a);
54963	assert_eq_m128(r, _mm_setzero_ps());
54964	let r = _mm_maskz_expand_ps(`0b00000101`, a);
54965	let e = _mm_set_ps(`0.`, `2.`, `0.`, `3.`);
54966	assert_eq_m128(r, e);
54967	}
54968
54969	#[simd_test(enable = "avx512f")]
54970	unsafe fn test_mm512_loadu_epi32() {
54971	let a = &[`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`];
54972	let p = a.as_ptr();
54973	let r = _mm512_loadu_epi32(black_box(p));
54974	let e = _mm512_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`);
54975	assert_eq_m512i(r, e);
54976	}
54977
54978	#[simd_test(enable = "avx512f,avx512vl")]
54979	unsafe fn test_mm256_loadu_epi32() {
54980	let a = &[`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`];
54981	let p = a.as_ptr();
54982	let r = _mm256_loadu_epi32(black_box(p));
54983	let e = _mm256_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`);
54984	assert_eq_m256i(r, e);
54985	}
54986
54987	#[simd_test(enable = "avx512f,avx512vl")]
54988	unsafe fn test_mm_loadu_epi32() {
54989	let a = &[`4`, `3`, `2`, `5`];
54990	let p = a.as_ptr();
54991	let r = _mm_loadu_epi32(black_box(p));
54992	let e = _mm_setr_epi32(`4`, `3`, `2`, `5`);
54993	assert_eq_m128i(r, e);
54994	}
54995
54996	#[simd_test(enable = "avx512f")]
54997	unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
54998	let a = _mm512_set1_epi32(`9`);
54999	let mut r = _mm256_undefined_si256();
55000	_mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
55001	let e = _mm256_set1_epi16(`9`);
55002	assert_eq_m256i(r, e);
55003	}
55004
55005	#[simd_test(enable = "avx512f,avx512vl")]
55006	unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
55007	let a = _mm256_set1_epi32(`9`);
55008	let mut r = _mm_undefined_si128();
55009	_mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111`, a);
55010	let e = _mm_set1_epi16(`9`);
55011	assert_eq_m128i(r, e);
55012	}
55013
55014	#[simd_test(enable = "avx512f,avx512vl")]
55015	unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
55016	let a = _mm_set1_epi32(`9`);
55017	let mut r = _mm_set1_epi8(`0`);
55018	_mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111`, a);
55019	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `9`, `9`, `9`, `9`);
55020	assert_eq_m128i(r, e);
55021	}
55022
55023	#[simd_test(enable = "avx512f")]
55024	unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
55025	let a = _mm512_set1_epi32(i32::MAX);
55026	let mut r = _mm256_undefined_si256();
55027	_mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
55028	let e = _mm256_set1_epi16(i16::MAX);
55029	assert_eq_m256i(r, e);
55030	}
55031
55032	#[simd_test(enable = "avx512f,avx512vl")]
55033	unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
55034	let a = _mm256_set1_epi32(i32::MAX);
55035	let mut r = _mm_undefined_si128();
55036	_mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111`, a);
55037	let e = _mm_set1_epi16(i16::MAX);
55038	assert_eq_m128i(r, e);
55039	}
55040
55041	#[simd_test(enable = "avx512f,avx512vl")]
55042	unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
55043	let a = _mm_set1_epi32(i32::MAX);
55044	let mut r = _mm_set1_epi8(`0`);
55045	_mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111`, a);
55046	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
55047	assert_eq_m128i(r, e);
55048	}
55049
55050	#[simd_test(enable = "avx512f")]
55051	unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
55052	let a = _mm512_set1_epi32(i32::MAX);
55053	let mut r = _mm256_undefined_si256();
55054	_mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
55055	let e = _mm256_set1_epi16(u16::MAX as i16);
55056	assert_eq_m256i(r, e);
55057	}
55058
55059	#[simd_test(enable = "avx512f,avx512vl")]
55060	unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
55061	let a = _mm256_set1_epi32(i32::MAX);
55062	let mut r = _mm_undefined_si128();
55063	_mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111`, a);
55064	let e = _mm_set1_epi16(u16::MAX as i16);
55065	assert_eq_m128i(r, e);
55066	}
55067
55068	#[simd_test(enable = "avx512f,avx512vl")]
55069	unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
55070	let a = _mm_set1_epi32(i32::MAX);
55071	let mut r = _mm_set1_epi8(`0`);
55072	_mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111`, a);
55073	let e = _mm_set_epi16(
55074	`0`,
55075	`0`,
55076	`0`,
55077	`0`,
55078	u16::MAX as i16,
55079	u16::MAX as i16,
55080	u16::MAX as i16,
55081	u16::MAX as i16,
55082	);
55083	assert_eq_m128i(r, e);
55084	}
55085
55086	#[simd_test(enable = "avx512f")]
55087	unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
55088	let a = _mm512_set1_epi32(`9`);
55089	let mut r = _mm_undefined_si128();
55090	_mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
55091	let e = _mm_set1_epi8(`9`);
55092	assert_eq_m128i(r, e);
55093	}
55094
55095	#[simd_test(enable = "avx512f,avx512vl")]
55096	unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
55097	let a = _mm256_set1_epi32(`9`);
55098	let mut r = _mm_set1_epi8(`0`);
55099	_mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
55100	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `9`, `9`, `9`, `9`, `9`, `9`, `9`, `9`);
55101	assert_eq_m128i(r, e);
55102	}
55103
55104	#[simd_test(enable = "avx512f,avx512vl")]
55105	unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
55106	let a = _mm_set1_epi32(`9`);
55107	let mut r = _mm_set1_epi8(`0`);
55108	_mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
55109	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `9`, `9`, `9`, `9`);
55110	assert_eq_m128i(r, e);
55111	}
55112
55113	#[simd_test(enable = "avx512f")]
55114	unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
55115	let a = _mm512_set1_epi32(i32::MAX);
55116	let mut r = _mm_undefined_si128();
55117	_mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
55118	let e = _mm_set1_epi8(i8::MAX);
55119	assert_eq_m128i(r, e);
55120	}
55121
55122	#[simd_test(enable = "avx512f,avx512vl")]
55123	unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
55124	let a = _mm256_set1_epi32(i32::MAX);
55125	let mut r = _mm_set1_epi8(`0`);
55126	_mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
55127	#[rustfmt::skip]
55128	let e = _mm_set_epi8(
55129	`0`, `0`, `0`, `0`,
55130	`0`, `0`, `0`, `0`,
55131	i8::MAX, i8::MAX, i8::MAX, i8::MAX,
55132	i8::MAX, i8::MAX, i8::MAX, i8::MAX,
55133	);
55134	assert_eq_m128i(r, e);
55135	}
55136
55137	#[simd_test(enable = "avx512f,avx512vl")]
55138	unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
55139	let a = _mm_set1_epi32(i32::MAX);
55140	let mut r = _mm_set1_epi8(`0`);
55141	_mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
55142	#[rustfmt::skip]
55143	let e = _mm_set_epi8(
55144	`0`, `0`, `0`, `0`,
55145	`0`, `0`, `0`, `0`,
55146	`0`, `0`, `0`, `0`,
55147	i8::MAX, i8::MAX, i8::MAX, i8::MAX,
55148	);
55149	assert_eq_m128i(r, e);
55150	}
55151
55152	#[simd_test(enable = "avx512f")]
55153	unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
55154	let a = _mm512_set1_epi32(i32::MAX);
55155	let mut r = _mm_undefined_si128();
55156	_mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
55157	let e = _mm_set1_epi8(u8::MAX as i8);
55158	assert_eq_m128i(r, e);
55159	}
55160
55161	#[simd_test(enable = "avx512f,avx512vl")]
55162	unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
55163	let a = _mm256_set1_epi32(i32::MAX);
55164	let mut r = _mm_set1_epi8(`0`);
55165	_mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
55166	#[rustfmt::skip]
55167	let e = _mm_set_epi8(
55168	`0`, `0`, `0`, `0`,
55169	`0`, `0`, `0`, `0`,
55170	u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
55171	u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
55172	);
55173	assert_eq_m128i(r, e);
55174	}
55175
55176	#[simd_test(enable = "avx512f,avx512vl")]
55177	unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
55178	let a = _mm_set1_epi32(i32::MAX);
55179	let mut r = _mm_set1_epi8(`0`);
55180	_mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
55181	#[rustfmt::skip]
55182	let e = _mm_set_epi8(
55183	`0`, `0`, `0`, `0`,
55184	`0`, `0`, `0`, `0`,
55185	`0`, `0`, `0`, `0`,
55186	u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
55187	);
55188	assert_eq_m128i(r, e);
55189	}
55190
55191	#[simd_test(enable = "avx512f")]
55192	unsafe fn test_mm512_storeu_epi32() {
55193	let a = _mm512_set1_epi32(`9`);
55194	let mut r = _mm512_undefined_epi32();
55195	_mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
55196	assert_eq_m512i(r, a);
55197	}
55198
55199	#[simd_test(enable = "avx512f,avx512vl")]
55200	unsafe fn test_mm256_storeu_epi32() {
55201	let a = _mm256_set1_epi32(`9`);
55202	let mut r = _mm256_undefined_si256();
55203	_mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
55204	assert_eq_m256i(r, a);
55205	}
55206
55207	#[simd_test(enable = "avx512f,avx512vl")]
55208	unsafe fn test_mm_storeu_epi32() {
55209	let a = _mm_set1_epi32(`9`);
55210	let mut r = _mm_undefined_si128();
55211	_mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
55212	assert_eq_m128i(r, a);
55213	}
55214
55215	#[simd_test(enable = "avx512f")]
55216	unsafe fn test_mm512_loadu_si512() {
55217	let a = &[`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`];
55218	let p = a.as_ptr();
55219	let r = _mm512_loadu_si512(black_box(p));
55220	let e = _mm512_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`);
55221	assert_eq_m512i(r, e);
55222	}
55223
55224	#[simd_test(enable = "avx512f")]
55225	unsafe fn test_mm512_storeu_si512() {
55226	let a = _mm512_set1_epi32(`9`);
55227	let mut r = _mm512_undefined_epi32();
55228	_mm512_storeu_si512(&mut r as *mut _ as *mut i32, a);
55229	assert_eq_m512i(r, a);
55230	}
55231
55232	#[simd_test(enable = "avx512f")]
55233	unsafe fn test_mm512_load_si512() {
55234	#[repr(align(`64`))]
55235	struct Align {
55236	data: [i32; `16`], // 64 bytes
55237	}
55238	let a = Align {
55239	data: [`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`],
55240	};
55241	let p = (a.data).as_ptr();
55242	let r = _mm512_load_si512(black_box(p));
55243	let e = _mm512_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`);
55244	assert_eq_m512i(r, e);
55245	}
55246
55247	#[simd_test(enable = "avx512f")]
55248	unsafe fn test_mm512_store_si512() {
55249	let a = _mm512_set1_epi32(`9`);
55250	let mut r = _mm512_undefined_epi32();
55251	_mm512_store_si512(&mut r as *mut _ as *mut i32, a);
55252	assert_eq_m512i(r, a);
55253	}
55254
55255	#[simd_test(enable = "avx512f")]
55256	unsafe fn test_mm512_load_epi32() {
55257	#[repr(align(`64`))]
55258	struct Align {
55259	data: [i32; `16`], // 64 bytes
55260	}
55261	let a = Align {
55262	data: [`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`],
55263	};
55264	let p = (a.data).as_ptr();
55265	let r = _mm512_load_epi32(black_box(p));
55266	let e = _mm512_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`);
55267	assert_eq_m512i(r, e);
55268	}
55269
55270	#[simd_test(enable = "avx512f,avx512vl")]
55271	unsafe fn test_mm256_load_epi32() {
55272	#[repr(align(`64`))]
55273	struct Align {
55274	data: [i32; `8`],
55275	}
55276	let a = Align {
55277	data: [`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`],
55278	};
55279	let p = (a.data).as_ptr();
55280	let r = _mm256_load_epi32(black_box(p));
55281	let e = _mm256_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`);
55282	assert_eq_m256i(r, e);
55283	}
55284
55285	#[simd_test(enable = "avx512f,avx512vl")]
55286	unsafe fn test_mm_load_epi32() {
55287	#[repr(align(`64`))]
55288	struct Align {
55289	data: [i32; `4`],
55290	}
55291	let a = Align { data: [`4`, `3`, `2`, `5`] };
55292	let p = (a.data).as_ptr();
55293	let r = _mm_load_epi32(black_box(p));
55294	let e = _mm_setr_epi32(`4`, `3`, `2`, `5`);
55295	assert_eq_m128i(r, e);
55296	}
55297
55298	#[simd_test(enable = "avx512f")]
55299	unsafe fn test_mm512_store_epi32() {
55300	let a = _mm512_set1_epi32(`9`);
55301	let mut r = _mm512_undefined_epi32();
55302	_mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
55303	assert_eq_m512i(r, a);
55304	}
55305
55306	#[simd_test(enable = "avx512f,avx512vl")]
55307	unsafe fn test_mm256_store_epi32() {
55308	let a = _mm256_set1_epi32(`9`);
55309	let mut r = _mm256_undefined_si256();
55310	_mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
55311	assert_eq_m256i(r, a);
55312	}
55313
55314	#[simd_test(enable = "avx512f,avx512vl")]
55315	unsafe fn test_mm_store_epi32() {
55316	let a = _mm_set1_epi32(`9`);
55317	let mut r = _mm_undefined_si128();
55318	_mm_store_epi32(&mut r as *mut _ as *mut i32, a);
55319	assert_eq_m128i(r, a);
55320	}
55321
55322	#[simd_test(enable = "avx512f")]
55323	unsafe fn test_mm512_load_ps() {
55324	#[repr(align(`64`))]
55325	struct Align {
55326	data: [f32; `16`], // 64 bytes
55327	}
55328	let a = Align {
55329	data: [
55330	`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
55331	],
55332	};
55333	let p = (a.data).as_ptr();
55334	let r = _mm512_load_ps(black_box(p));
55335	let e = _mm512_setr_ps(
55336	`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
55337	);
55338	assert_eq_m512(r, e);
55339	}
55340
55341	#[simd_test(enable = "avx512f")]
55342	unsafe fn test_mm512_store_ps() {
55343	let a = _mm512_set1_ps(`9.`);
55344	let mut r = _mm512_undefined_ps();
55345	_mm512_store_ps(&mut r as *mut _ as *mut f32, a);
55346	assert_eq_m512(r, a);
55347	}
55348
55349	#[simd_test(enable = "avx512f")]
55350	unsafe fn test_mm512_mask_set1_epi32() {
55351	let src = _mm512_set1_epi32(`2`);
55352	let a: i32 = `11`;
55353	let r = _mm512_mask_set1_epi32(src, `0`, a);
55354	assert_eq_m512i(r, src);
55355	let r = _mm512_mask_set1_epi32(src, `0b11111111_11111111`, a);
55356	let e = _mm512_set1_epi32(`11`);
55357	assert_eq_m512i(r, e);
55358	}
55359
55360	#[simd_test(enable = "avx512f")]
55361	unsafe fn test_mm512_maskz_set1_epi32() {
55362	let a: i32 = `11`;
55363	let r = _mm512_maskz_set1_epi32(`0`, a);
55364	assert_eq_m512i(r, _mm512_setzero_si512());
55365	let r = _mm512_maskz_set1_epi32(`0b11111111_11111111`, a);
55366	let e = _mm512_set1_epi32(`11`);
55367	assert_eq_m512i(r, e);
55368	}
55369
55370	#[simd_test(enable = "avx512f,avx512vl")]
55371	unsafe fn test_mm256_mask_set1_epi32() {
55372	let src = _mm256_set1_epi32(`2`);
55373	let a: i32 = `11`;
55374	let r = _mm256_mask_set1_epi32(src, `0`, a);
55375	assert_eq_m256i(r, src);
55376	let r = _mm256_mask_set1_epi32(src, `0b11111111`, a);
55377	let e = _mm256_set1_epi32(`11`);
55378	assert_eq_m256i(r, e);
55379	}
55380
55381	#[simd_test(enable = "avx512f")]
55382	unsafe fn test_mm256_maskz_set1_epi32() {
55383	let a: i32 = `11`;
55384	let r = _mm256_maskz_set1_epi32(`0`, a);
55385	assert_eq_m256i(r, _mm256_setzero_si256());
55386	let r = _mm256_maskz_set1_epi32(`0b11111111`, a);
55387	let e = _mm256_set1_epi32(`11`);
55388	assert_eq_m256i(r, e);
55389	}
55390
55391	#[simd_test(enable = "avx512f,avx512vl")]
55392	unsafe fn test_mm_mask_set1_epi32() {
55393	let src = _mm_set1_epi32(`2`);
55394	let a: i32 = `11`;
55395	let r = _mm_mask_set1_epi32(src, `0`, a);
55396	assert_eq_m128i(r, src);
55397	let r = _mm_mask_set1_epi32(src, `0b00001111`, a);
55398	let e = _mm_set1_epi32(`11`);
55399	assert_eq_m128i(r, e);
55400	}
55401
55402	#[simd_test(enable = "avx512f")]
55403	unsafe fn test_mm_maskz_set1_epi32() {
55404	let a: i32 = `11`;
55405	let r = _mm_maskz_set1_epi32(`0`, a);
55406	assert_eq_m128i(r, _mm_setzero_si128());
55407	let r = _mm_maskz_set1_epi32(`0b00001111`, a);
55408	let e = _mm_set1_epi32(`11`);
55409	assert_eq_m128i(r, e);
55410	}
55411
55412	#[simd_test(enable = "avx512f")]
55413	unsafe fn test_mm_mask_move_ss() {
55414	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
55415	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55416	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
55417	let r = _mm_mask_move_ss(src, `0`, a, b);
55418	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
55419	assert_eq_m128(r, e);
55420	let r = _mm_mask_move_ss(src, `0b11111111`, a, b);
55421	let e = _mm_set_ps(`1.`, `2.`, `10.`, `40.`);
55422	assert_eq_m128(r, e);
55423	}
55424
55425	#[simd_test(enable = "avx512f")]
55426	unsafe fn test_mm_maskz_move_ss() {
55427	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55428	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
55429	let r = _mm_maskz_move_ss(`0`, a, b);
55430	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
55431	assert_eq_m128(r, e);
55432	let r = _mm_maskz_move_ss(`0b11111111`, a, b);
55433	let e = _mm_set_ps(`1.`, `2.`, `10.`, `40.`);
55434	assert_eq_m128(r, e);
55435	}
55436
55437	#[simd_test(enable = "avx512f")]
55438	unsafe fn test_mm_mask_move_sd() {
55439	let src = _mm_set_pd(`10.`, `11.`);
55440	let a = _mm_set_pd(`1.`, `2.`);
55441	let b = _mm_set_pd(`3.`, `4.`);
55442	let r = _mm_mask_move_sd(src, `0`, a, b);
55443	let e = _mm_set_pd(`1.`, `11.`);
55444	assert_eq_m128d(r, e);
55445	let r = _mm_mask_move_sd(src, `0b11111111`, a, b);
55446	let e = _mm_set_pd(`1.`, `4.`);
55447	assert_eq_m128d(r, e);
55448	}
55449
55450	#[simd_test(enable = "avx512f")]
55451	unsafe fn test_mm_maskz_move_sd() {
55452	let a = _mm_set_pd(`1.`, `2.`);
55453	let b = _mm_set_pd(`3.`, `4.`);
55454	let r = _mm_maskz_move_sd(`0`, a, b);
55455	let e = _mm_set_pd(`1.`, `0.`);
55456	assert_eq_m128d(r, e);
55457	let r = _mm_maskz_move_sd(`0b11111111`, a, b);
55458	let e = _mm_set_pd(`1.`, `4.`);
55459	assert_eq_m128d(r, e);
55460	}
55461
55462	#[simd_test(enable = "avx512f")]
55463	unsafe fn test_mm_mask_add_ss() {
55464	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
55465	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55466	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
55467	let r = _mm_mask_add_ss(src, `0`, a, b);
55468	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
55469	assert_eq_m128(r, e);
55470	let r = _mm_mask_add_ss(src, `0b11111111`, a, b);
55471	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
55472	assert_eq_m128(r, e);
55473	}
55474
55475	#[simd_test(enable = "avx512f")]
55476	unsafe fn test_mm_maskz_add_ss() {
55477	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55478	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
55479	let r = _mm_maskz_add_ss(`0`, a, b);
55480	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
55481	assert_eq_m128(r, e);
55482	let r = _mm_maskz_add_ss(`0b11111111`, a, b);
55483	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
55484	assert_eq_m128(r, e);
55485	}
55486
55487	#[simd_test(enable = "avx512f")]
55488	unsafe fn test_mm_mask_add_sd() {
55489	let src = _mm_set_pd(`10.`, `11.`);
55490	let a = _mm_set_pd(`1.`, `2.`);
55491	let b = _mm_set_pd(`3.`, `4.`);
55492	let r = _mm_mask_add_sd(src, `0`, a, b);
55493	let e = _mm_set_pd(`1.`, `11.`);
55494	assert_eq_m128d(r, e);
55495	let r = _mm_mask_add_sd(src, `0b11111111`, a, b);
55496	let e = _mm_set_pd(`1.`, `6.`);
55497	assert_eq_m128d(r, e);
55498	}
55499
55500	#[simd_test(enable = "avx512f")]
55501	unsafe fn test_mm_maskz_add_sd() {
55502	let a = _mm_set_pd(`1.`, `2.`);
55503	let b = _mm_set_pd(`3.`, `4.`);
55504	let r = _mm_maskz_add_sd(`0`, a, b);
55505	let e = _mm_set_pd(`1.`, `0.`);
55506	assert_eq_m128d(r, e);
55507	let r = _mm_maskz_add_sd(`0b11111111`, a, b);
55508	let e = _mm_set_pd(`1.`, `6.`);
55509	assert_eq_m128d(r, e);
55510	}
55511
55512	#[simd_test(enable = "avx512f")]
55513	unsafe fn test_mm_mask_sub_ss() {
55514	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
55515	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55516	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
55517	let r = _mm_mask_sub_ss(src, `0`, a, b);
55518	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
55519	assert_eq_m128(r, e);
55520	let r = _mm_mask_sub_ss(src, `0b11111111`, a, b);
55521	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
55522	assert_eq_m128(r, e);
55523	}
55524
55525	#[simd_test(enable = "avx512f")]
55526	unsafe fn test_mm_maskz_sub_ss() {
55527	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55528	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
55529	let r = _mm_maskz_sub_ss(`0`, a, b);
55530	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
55531	assert_eq_m128(r, e);
55532	let r = _mm_maskz_sub_ss(`0b11111111`, a, b);
55533	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
55534	assert_eq_m128(r, e);
55535	}
55536
55537	#[simd_test(enable = "avx512f")]
55538	unsafe fn test_mm_mask_sub_sd() {
55539	let src = _mm_set_pd(`10.`, `11.`);
55540	let a = _mm_set_pd(`1.`, `2.`);
55541	let b = _mm_set_pd(`3.`, `4.`);
55542	let r = _mm_mask_sub_sd(src, `0`, a, b);
55543	let e = _mm_set_pd(`1.`, `11.`);
55544	assert_eq_m128d(r, e);
55545	let r = _mm_mask_sub_sd(src, `0b11111111`, a, b);
55546	let e = _mm_set_pd(`1.`, `-2.`);
55547	assert_eq_m128d(r, e);
55548	}
55549
55550	#[simd_test(enable = "avx512f")]
55551	unsafe fn test_mm_maskz_sub_sd() {
55552	let a = _mm_set_pd(`1.`, `2.`);
55553	let b = _mm_set_pd(`3.`, `4.`);
55554	let r = _mm_maskz_sub_sd(`0`, a, b);
55555	let e = _mm_set_pd(`1.`, `0.`);
55556	assert_eq_m128d(r, e);
55557	let r = _mm_maskz_sub_sd(`0b11111111`, a, b);
55558	let e = _mm_set_pd(`1.`, `-2.`);
55559	assert_eq_m128d(r, e);
55560	}
55561
55562	#[simd_test(enable = "avx512f")]
55563	unsafe fn test_mm_mask_mul_ss() {
55564	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
55565	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55566	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
55567	let r = _mm_mask_mul_ss(src, `0`, a, b);
55568	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
55569	assert_eq_m128(r, e);
55570	let r = _mm_mask_mul_ss(src, `0b11111111`, a, b);
55571	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
55572	assert_eq_m128(r, e);
55573	}
55574
55575	#[simd_test(enable = "avx512f")]
55576	unsafe fn test_mm_maskz_mul_ss() {
55577	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55578	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
55579	let r = _mm_maskz_mul_ss(`0`, a, b);
55580	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
55581	assert_eq_m128(r, e);
55582	let r = _mm_maskz_mul_ss(`0b11111111`, a, b);
55583	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
55584	assert_eq_m128(r, e);
55585	}
55586
55587	#[simd_test(enable = "avx512f")]
55588	unsafe fn test_mm_mask_mul_sd() {
55589	let src = _mm_set_pd(`10.`, `11.`);
55590	let a = _mm_set_pd(`1.`, `2.`);
55591	let b = _mm_set_pd(`3.`, `4.`);
55592	let r = _mm_mask_mul_sd(src, `0`, a, b);
55593	let e = _mm_set_pd(`1.`, `11.`);
55594	assert_eq_m128d(r, e);
55595	let r = _mm_mask_mul_sd(src, `0b11111111`, a, b);
55596	let e = _mm_set_pd(`1.`, `8.`);
55597	assert_eq_m128d(r, e);
55598	}
55599
55600	#[simd_test(enable = "avx512f")]
55601	unsafe fn test_mm_maskz_mul_sd() {
55602	let a = _mm_set_pd(`1.`, `2.`);
55603	let b = _mm_set_pd(`3.`, `4.`);
55604	let r = _mm_maskz_mul_sd(`0`, a, b);
55605	let e = _mm_set_pd(`1.`, `0.`);
55606	assert_eq_m128d(r, e);
55607	let r = _mm_maskz_mul_sd(`0b11111111`, a, b);
55608	let e = _mm_set_pd(`1.`, `8.`);
55609	assert_eq_m128d(r, e);
55610	}
55611
55612	#[simd_test(enable = "avx512f")]
55613	unsafe fn test_mm_mask_div_ss() {
55614	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
55615	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55616	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
55617	let r = _mm_mask_div_ss(src, `0`, a, b);
55618	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
55619	assert_eq_m128(r, e);
55620	let r = _mm_mask_div_ss(src, `0b11111111`, a, b);
55621	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
55622	assert_eq_m128(r, e);
55623	}
55624
55625	#[simd_test(enable = "avx512f")]
55626	unsafe fn test_mm_maskz_div_ss() {
55627	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55628	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
55629	let r = _mm_maskz_div_ss(`0`, a, b);
55630	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
55631	assert_eq_m128(r, e);
55632	let r = _mm_maskz_div_ss(`0b11111111`, a, b);
55633	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
55634	assert_eq_m128(r, e);
55635	}
55636
55637	#[simd_test(enable = "avx512f")]
55638	unsafe fn test_mm_mask_div_sd() {
55639	let src = _mm_set_pd(`10.`, `11.`);
55640	let a = _mm_set_pd(`1.`, `2.`);
55641	let b = _mm_set_pd(`3.`, `4.`);
55642	let r = _mm_mask_div_sd(src, `0`, a, b);
55643	let e = _mm_set_pd(`1.`, `11.`);
55644	assert_eq_m128d(r, e);
55645	let r = _mm_mask_div_sd(src, `0b11111111`, a, b);
55646	let e = _mm_set_pd(`1.`, `0.5`);
55647	assert_eq_m128d(r, e);
55648	}
55649
55650	#[simd_test(enable = "avx512f")]
55651	unsafe fn test_mm_maskz_div_sd() {
55652	let a = _mm_set_pd(`1.`, `2.`);
55653	let b = _mm_set_pd(`3.`, `4.`);
55654	let r = _mm_maskz_div_sd(`0`, a, b);
55655	let e = _mm_set_pd(`1.`, `0.`);
55656	assert_eq_m128d(r, e);
55657	let r = _mm_maskz_div_sd(`0b11111111`, a, b);
55658	let e = _mm_set_pd(`1.`, `0.5`);
55659	assert_eq_m128d(r, e);
55660	}
55661
55662	#[simd_test(enable = "avx512f")]
55663	unsafe fn test_mm_mask_max_ss() {
55664	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55665	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
55666	let r = _mm_mask_max_ss(a, `0`, a, b);
55667	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55668	assert_eq_m128(r, e);
55669	let r = _mm_mask_max_ss(a, `0b11111111`, a, b);
55670	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
55671	assert_eq_m128(r, e);
55672	}
55673
55674	#[simd_test(enable = "avx512f")]
55675	unsafe fn test_mm_maskz_max_ss() {
55676	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55677	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
55678	let r = _mm_maskz_max_ss(`0`, a, b);
55679	let e = _mm_set_ps(`0.`, `1.`, `2.`, `0.`);
55680	assert_eq_m128(r, e);
55681	let r = _mm_maskz_max_ss(`0b11111111`, a, b);
55682	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
55683	assert_eq_m128(r, e);
55684	}
55685
55686	#[simd_test(enable = "avx512f")]
55687	unsafe fn test_mm_mask_max_sd() {
55688	let a = _mm_set_pd(`0.`, `1.`);
55689	let b = _mm_set_pd(`2.`, `3.`);
55690	let r = _mm_mask_max_sd(a, `0`, a, b);
55691	let e = _mm_set_pd(`0.`, `1.`);
55692	assert_eq_m128d(r, e);
55693	let r = _mm_mask_max_sd(a, `0b11111111`, a, b);
55694	let e = _mm_set_pd(`0.`, `3.`);
55695	assert_eq_m128d(r, e);
55696	}
55697
55698	#[simd_test(enable = "avx512f")]
55699	unsafe fn test_mm_maskz_max_sd() {
55700	let a = _mm_set_pd(`0.`, `1.`);
55701	let b = _mm_set_pd(`2.`, `3.`);
55702	let r = _mm_maskz_max_sd(`0`, a, b);
55703	let e = _mm_set_pd(`0.`, `0.`);
55704	assert_eq_m128d(r, e);
55705	let r = _mm_maskz_max_sd(`0b11111111`, a, b);
55706	let e = _mm_set_pd(`0.`, `3.`);
55707	assert_eq_m128d(r, e);
55708	}
55709
55710	#[simd_test(enable = "avx512f")]
55711	unsafe fn test_mm_mask_min_ss() {
55712	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55713	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
55714	let r = _mm_mask_min_ss(a, `0`, a, b);
55715	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55716	assert_eq_m128(r, e);
55717	let r = _mm_mask_min_ss(a, `0b11111111`, a, b);
55718	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55719	assert_eq_m128(r, e);
55720	}
55721
55722	#[simd_test(enable = "avx512f")]
55723	unsafe fn test_mm_maskz_min_ss() {
55724	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55725	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
55726	let r = _mm_maskz_min_ss(`0`, a, b);
55727	let e = _mm_set_ps(`0.`, `1.`, `2.`, `0.`);
55728	assert_eq_m128(r, e);
55729	let r = _mm_maskz_min_ss(`0b11111111`, a, b);
55730	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55731	assert_eq_m128(r, e);
55732	}
55733
55734	#[simd_test(enable = "avx512f")]
55735	unsafe fn test_mm_mask_min_sd() {
55736	let a = _mm_set_pd(`0.`, `1.`);
55737	let b = _mm_set_pd(`2.`, `3.`);
55738	let r = _mm_mask_min_sd(a, `0`, a, b);
55739	let e = _mm_set_pd(`0.`, `1.`);
55740	assert_eq_m128d(r, e);
55741	let r = _mm_mask_min_sd(a, `0b11111111`, a, b);
55742	let e = _mm_set_pd(`0.`, `1.`);
55743	assert_eq_m128d(r, e);
55744	}
55745
55746	#[simd_test(enable = "avx512f")]
55747	unsafe fn test_mm_maskz_min_sd() {
55748	let a = _mm_set_pd(`0.`, `1.`);
55749	let b = _mm_set_pd(`2.`, `3.`);
55750	let r = _mm_maskz_min_sd(`0`, a, b);
55751	let e = _mm_set_pd(`0.`, `0.`);
55752	assert_eq_m128d(r, e);
55753	let r = _mm_maskz_min_sd(`0b11111111`, a, b);
55754	let e = _mm_set_pd(`0.`, `1.`);
55755	assert_eq_m128d(r, e);
55756	}
55757
55758	#[simd_test(enable = "avx512f")]
55759	unsafe fn test_mm_mask_sqrt_ss() {
55760	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
55761	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55762	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
55763	let r = _mm_mask_sqrt_ss(src, `0`, a, b);
55764	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
55765	assert_eq_m128(r, e);
55766	let r = _mm_mask_sqrt_ss(src, `0b11111111`, a, b);
55767	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
55768	assert_eq_m128(r, e);
55769	}
55770
55771	#[simd_test(enable = "avx512f")]
55772	unsafe fn test_mm_maskz_sqrt_ss() {
55773	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55774	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
55775	let r = _mm_maskz_sqrt_ss(`0`, a, b);
55776	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
55777	assert_eq_m128(r, e);
55778	let r = _mm_maskz_sqrt_ss(`0b11111111`, a, b);
55779	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
55780	assert_eq_m128(r, e);
55781	}
55782
55783	#[simd_test(enable = "avx512f")]
55784	unsafe fn test_mm_mask_sqrt_sd() {
55785	let src = _mm_set_pd(`10.`, `11.`);
55786	let a = _mm_set_pd(`1.`, `2.`);
55787	let b = _mm_set_pd(`3.`, `4.`);
55788	let r = _mm_mask_sqrt_sd(src, `0`, a, b);
55789	let e = _mm_set_pd(`1.`, `11.`);
55790	assert_eq_m128d(r, e);
55791	let r = _mm_mask_sqrt_sd(src, `0b11111111`, a, b);
55792	let e = _mm_set_pd(`1.`, `2.`);
55793	assert_eq_m128d(r, e);
55794	}
55795
55796	#[simd_test(enable = "avx512f")]
55797	unsafe fn test_mm_maskz_sqrt_sd() {
55798	let a = _mm_set_pd(`1.`, `2.`);
55799	let b = _mm_set_pd(`3.`, `4.`);
55800	let r = _mm_maskz_sqrt_sd(`0`, a, b);
55801	let e = _mm_set_pd(`1.`, `0.`);
55802	assert_eq_m128d(r, e);
55803	let r = _mm_maskz_sqrt_sd(`0b11111111`, a, b);
55804	let e = _mm_set_pd(`1.`, `2.`);
55805	assert_eq_m128d(r, e);
55806	}
55807
55808	#[simd_test(enable = "avx512f")]
55809	unsafe fn test_mm_rsqrt14_ss() {
55810	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55811	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
55812	let r = _mm_rsqrt14_ss(a, b);
55813	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
55814	assert_eq_m128(r, e);
55815	}
55816
55817	#[simd_test(enable = "avx512f")]
55818	unsafe fn test_mm_mask_rsqrt14_ss() {
55819	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
55820	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55821	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
55822	let r = _mm_mask_rsqrt14_ss(src, `0`, a, b);
55823	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
55824	assert_eq_m128(r, e);
55825	let r = _mm_mask_rsqrt14_ss(src, `0b11111111`, a, b);
55826	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
55827	assert_eq_m128(r, e);
55828	}
55829
55830	#[simd_test(enable = "avx512f")]
55831	unsafe fn test_mm_maskz_rsqrt14_ss() {
55832	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55833	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
55834	let r = _mm_maskz_rsqrt14_ss(`0`, a, b);
55835	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
55836	assert_eq_m128(r, e);
55837	let r = _mm_maskz_rsqrt14_ss(`0b11111111`, a, b);
55838	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
55839	assert_eq_m128(r, e);
55840	}
55841
55842	#[simd_test(enable = "avx512f")]
55843	unsafe fn test_mm_rsqrt14_sd() {
55844	let a = _mm_set_pd(`1.`, `2.`);
55845	let b = _mm_set_pd(`3.`, `4.`);
55846	let r = _mm_rsqrt14_sd(a, b);
55847	let e = _mm_set_pd(`1.`, `0.5`);
55848	assert_eq_m128d(r, e);
55849	}
55850
55851	#[simd_test(enable = "avx512f")]
55852	unsafe fn test_mm_mask_rsqrt14_sd() {
55853	let src = _mm_set_pd(`10.`, `11.`);
55854	let a = _mm_set_pd(`1.`, `2.`);
55855	let b = _mm_set_pd(`3.`, `4.`);
55856	let r = _mm_mask_rsqrt14_sd(src, `0`, a, b);
55857	let e = _mm_set_pd(`1.`, `11.`);
55858	assert_eq_m128d(r, e);
55859	let r = _mm_mask_rsqrt14_sd(src, `0b11111111`, a, b);
55860	let e = _mm_set_pd(`1.`, `0.5`);
55861	assert_eq_m128d(r, e);
55862	}
55863
55864	#[simd_test(enable = "avx512f")]
55865	unsafe fn test_mm_maskz_rsqrt14_sd() {
55866	let a = _mm_set_pd(`1.`, `2.`);
55867	let b = _mm_set_pd(`3.`, `4.`);
55868	let r = _mm_maskz_rsqrt14_sd(`0`, a, b);
55869	let e = _mm_set_pd(`1.`, `0.`);
55870	assert_eq_m128d(r, e);
55871	let r = _mm_maskz_rsqrt14_sd(`0b11111111`, a, b);
55872	let e = _mm_set_pd(`1.`, `0.5`);
55873	assert_eq_m128d(r, e);
55874	}
55875
55876	#[simd_test(enable = "avx512f")]
55877	unsafe fn test_mm_rcp14_ss() {
55878	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55879	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
55880	let r = _mm_rcp14_ss(a, b);
55881	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.25`);
55882	assert_eq_m128(r, e);
55883	}
55884
55885	#[simd_test(enable = "avx512f")]
55886	unsafe fn test_mm_mask_rcp14_ss() {
55887	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
55888	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55889	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
55890	let r = _mm_mask_rcp14_ss(src, `0`, a, b);
55891	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
55892	assert_eq_m128(r, e);
55893	let r = _mm_mask_rcp14_ss(src, `0b11111111`, a, b);
55894	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.25`);
55895	assert_eq_m128(r, e);
55896	}
55897
55898	#[simd_test(enable = "avx512f")]
55899	unsafe fn test_mm_maskz_rcp14_ss() {
55900	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
55901	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
55902	let r = _mm_maskz_rcp14_ss(`0`, a, b);
55903	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
55904	assert_eq_m128(r, e);
55905	let r = _mm_maskz_rcp14_ss(`0b11111111`, a, b);
55906	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.25`);
55907	assert_eq_m128(r, e);
55908	}
55909
55910	#[simd_test(enable = "avx512f")]
55911	unsafe fn test_mm_rcp14_sd() {
55912	let a = _mm_set_pd(`1.`, `2.`);
55913	let b = _mm_set_pd(`3.`, `4.`);
55914	let r = _mm_rcp14_sd(a, b);
55915	let e = _mm_set_pd(`1.`, `0.25`);
55916	assert_eq_m128d(r, e);
55917	}
55918
55919	#[simd_test(enable = "avx512f")]
55920	unsafe fn test_mm_mask_rcp14_sd() {
55921	let src = _mm_set_pd(`10.`, `11.`);
55922	let a = _mm_set_pd(`1.`, `2.`);
55923	let b = _mm_set_pd(`3.`, `4.`);
55924	let r = _mm_mask_rcp14_sd(src, `0`, a, b);
55925	let e = _mm_set_pd(`1.`, `11.`);
55926	assert_eq_m128d(r, e);
55927	let r = _mm_mask_rcp14_sd(src, `0b11111111`, a, b);
55928	let e = _mm_set_pd(`1.`, `0.25`);
55929	assert_eq_m128d(r, e);
55930	}
55931
55932	#[simd_test(enable = "avx512f")]
55933	unsafe fn test_mm_maskz_rcp14_sd() {
55934	let a = _mm_set_pd(`1.`, `2.`);
55935	let b = _mm_set_pd(`3.`, `4.`);
55936	let r = _mm_maskz_rcp14_sd(`0`, a, b);
55937	let e = _mm_set_pd(`1.`, `0.`);
55938	assert_eq_m128d(r, e);
55939	let r = _mm_maskz_rcp14_sd(`0b11111111`, a, b);
55940	let e = _mm_set_pd(`1.`, `0.25`);
55941	assert_eq_m128d(r, e);
55942	}
55943
55944	#[simd_test(enable = "avx512f")]
55945	unsafe fn test_mm_getexp_ss() {
55946	let a = _mm_set1_ps(`2.`);
55947	let b = _mm_set1_ps(`3.`);
55948	let r = _mm_getexp_ss(a, b);
55949	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
55950	assert_eq_m128(r, e);
55951	}
55952
55953	#[simd_test(enable = "avx512f")]
55954	unsafe fn test_mm_mask_getexp_ss() {
55955	let a = _mm_set1_ps(`2.`);
55956	let b = _mm_set1_ps(`3.`);
55957	let r = _mm_mask_getexp_ss(a, `0`, a, b);
55958	let e = _mm_set_ps(`2.`, `2.`, `2.`, `2.`);
55959	assert_eq_m128(r, e);
55960	let r = _mm_mask_getexp_ss(a, `0b11111111`, a, b);
55961	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
55962	assert_eq_m128(r, e);
55963	}
55964
55965	#[simd_test(enable = "avx512f")]
55966	unsafe fn test_mm_maskz_getexp_ss() {
55967	let a = _mm_set1_ps(`2.`);
55968	let b = _mm_set1_ps(`3.`);
55969	let r = _mm_maskz_getexp_ss(`0`, a, b);
55970	let e = _mm_set_ps(`2.`, `2.`, `2.`, `0.`);
55971	assert_eq_m128(r, e);
55972	let r = _mm_maskz_getexp_ss(`0b11111111`, a, b);
55973	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
55974	assert_eq_m128(r, e);
55975	}
55976
55977	#[simd_test(enable = "avx512f")]
55978	unsafe fn test_mm_getexp_sd() {
55979	let a = _mm_set1_pd(`2.`);
55980	let b = _mm_set1_pd(`3.`);
55981	let r = _mm_getexp_sd(a, b);
55982	let e = _mm_set_pd(`2.`, `1.`);
55983	assert_eq_m128d(r, e);
55984	}
55985
55986	#[simd_test(enable = "avx512f")]
55987	unsafe fn test_mm_mask_getexp_sd() {
55988	let a = _mm_set1_pd(`2.`);
55989	let b = _mm_set1_pd(`3.`);
55990	let r = _mm_mask_getexp_sd(a, `0`, a, b);
55991	let e = _mm_set_pd(`2.`, `2.`);
55992	assert_eq_m128d(r, e);
55993	let r = _mm_mask_getexp_sd(a, `0b11111111`, a, b);
55994	let e = _mm_set_pd(`2.`, `1.`);
55995	assert_eq_m128d(r, e);
55996	}
55997
55998	#[simd_test(enable = "avx512f")]
55999	unsafe fn test_mm_maskz_getexp_sd() {
56000	let a = _mm_set1_pd(`2.`);
56001	let b = _mm_set1_pd(`3.`);
56002	let r = _mm_maskz_getexp_sd(`0`, a, b);
56003	let e = _mm_set_pd(`2.`, `0.`);
56004	assert_eq_m128d(r, e);
56005	let r = _mm_maskz_getexp_sd(`0b11111111`, a, b);
56006	let e = _mm_set_pd(`2.`, `1.`);
56007	assert_eq_m128d(r, e);
56008	}
56009
56010	#[simd_test(enable = "avx512f")]
56011	unsafe fn test_mm_getmant_ss() {
56012	let a = _mm_set1_ps(`20.`);
56013	let b = _mm_set1_ps(`10.`);
56014	let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
56015	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
56016	assert_eq_m128(r, e);
56017	}
56018
56019	#[simd_test(enable = "avx512f")]
56020	unsafe fn test_mm_mask_getmant_ss() {
56021	let a = _mm_set1_ps(`20.`);
56022	let b = _mm_set1_ps(`10.`);
56023	let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a, b);
56024	let e = _mm_set_ps(`20.`, `20.`, `20.`, `20.`);
56025	assert_eq_m128(r, e);
56026	let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0b11111111`, a, b);
56027	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
56028	assert_eq_m128(r, e);
56029	}
56030
56031	#[simd_test(enable = "avx512f")]
56032	unsafe fn test_mm_maskz_getmant_ss() {
56033	let a = _mm_set1_ps(`20.`);
56034	let b = _mm_set1_ps(`10.`);
56035	let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a, b);
56036	let e = _mm_set_ps(`20.`, `20.`, `20.`, `0.`);
56037	assert_eq_m128(r, e);
56038	let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b11111111`, a, b);
56039	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
56040	assert_eq_m128(r, e);
56041	}
56042
56043	#[simd_test(enable = "avx512f")]
56044	unsafe fn test_mm_getmant_sd() {
56045	let a = _mm_set1_pd(`20.`);
56046	let b = _mm_set1_pd(`10.`);
56047	let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
56048	let e = _mm_set_pd(`20.`, `1.25`);
56049	assert_eq_m128d(r, e);
56050	}
56051
56052	#[simd_test(enable = "avx512f")]
56053	unsafe fn test_mm_mask_getmant_sd() {
56054	let a = _mm_set1_pd(`20.`);
56055	let b = _mm_set1_pd(`10.`);
56056	let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a, b);
56057	let e = _mm_set_pd(`20.`, `20.`);
56058	assert_eq_m128d(r, e);
56059	let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0b11111111`, a, b);
56060	let e = _mm_set_pd(`20.`, `1.25`);
56061	assert_eq_m128d(r, e);
56062	}
56063
56064	#[simd_test(enable = "avx512f")]
56065	unsafe fn test_mm_maskz_getmant_sd() {
56066	let a = _mm_set1_pd(`20.`);
56067	let b = _mm_set1_pd(`10.`);
56068	let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a, b);
56069	let e = _mm_set_pd(`20.`, `0.`);
56070	assert_eq_m128d(r, e);
56071	let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b11111111`, a, b);
56072	let e = _mm_set_pd(`20.`, `1.25`);
56073	assert_eq_m128d(r, e);
56074	}
56075
56076	#[simd_test(enable = "avx512f")]
56077	unsafe fn test_mm_roundscale_ss() {
56078	let a = _mm_set1_ps(`2.2`);
56079	let b = _mm_set1_ps(`1.1`);
56080	let r = _mm_roundscale_ss::<`0`>(a, b);
56081	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
56082	assert_eq_m128(r, e);
56083	}
56084
56085	#[simd_test(enable = "avx512f")]
56086	unsafe fn test_mm_mask_roundscale_ss() {
56087	let a = _mm_set1_ps(`2.2`);
56088	let b = _mm_set1_ps(`1.1`);
56089	let r = _mm_mask_roundscale_ss::<`0`>(a, `0`, a, b);
56090	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `2.2`);
56091	assert_eq_m128(r, e);
56092	let r = _mm_mask_roundscale_ss::<`0`>(a, `0b11111111`, a, b);
56093	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
56094	assert_eq_m128(r, e);
56095	}
56096
56097	#[simd_test(enable = "avx512f")]
56098	unsafe fn test_mm_maskz_roundscale_ss() {
56099	let a = _mm_set1_ps(`2.2`);
56100	let b = _mm_set1_ps(`1.1`);
56101	let r = _mm_maskz_roundscale_ss::<`0`>(`0`, a, b);
56102	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `0.0`);
56103	assert_eq_m128(r, e);
56104	let r = _mm_maskz_roundscale_ss::<`0`>(`0b11111111`, a, b);
56105	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
56106	assert_eq_m128(r, e);
56107	}
56108
56109	#[simd_test(enable = "avx512f")]
56110	unsafe fn test_mm_roundscale_sd() {
56111	let a = _mm_set1_pd(`2.2`);
56112	let b = _mm_set1_pd(`1.1`);
56113	let r = _mm_roundscale_sd::<`0`>(a, b);
56114	let e = _mm_set_pd(`2.2`, `1.0`);
56115	assert_eq_m128d(r, e);
56116	}
56117
56118	#[simd_test(enable = "avx512f")]
56119	unsafe fn test_mm_mask_roundscale_sd() {
56120	let a = _mm_set1_pd(`2.2`);
56121	let b = _mm_set1_pd(`1.1`);
56122	let r = _mm_mask_roundscale_sd::<`0`>(a, `0`, a, b);
56123	let e = _mm_set_pd(`2.2`, `2.2`);
56124	assert_eq_m128d(r, e);
56125	let r = _mm_mask_roundscale_sd::<`0`>(a, `0b11111111`, a, b);
56126	let e = _mm_set_pd(`2.2`, `1.0`);
56127	assert_eq_m128d(r, e);
56128	}
56129
56130	#[simd_test(enable = "avx512f")]
56131	unsafe fn test_mm_maskz_roundscale_sd() {
56132	let a = _mm_set1_pd(`2.2`);
56133	let b = _mm_set1_pd(`1.1`);
56134	let r = _mm_maskz_roundscale_sd::<`0`>(`0`, a, b);
56135	let e = _mm_set_pd(`2.2`, `0.0`);
56136	assert_eq_m128d(r, e);
56137	let r = _mm_maskz_roundscale_sd::<`0`>(`0b11111111`, a, b);
56138	let e = _mm_set_pd(`2.2`, `1.0`);
56139	assert_eq_m128d(r, e);
56140	}
56141
56142	#[simd_test(enable = "avx512f")]
56143	unsafe fn test_mm_scalef_ss() {
56144	let a = _mm_set1_ps(`1.`);
56145	let b = _mm_set1_ps(`3.`);
56146	let r = _mm_scalef_ss(a, b);
56147	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
56148	assert_eq_m128(r, e);
56149	}
56150
56151	#[simd_test(enable = "avx512f")]
56152	unsafe fn test_mm_mask_scalef_ss() {
56153	let a = _mm_set1_ps(`1.`);
56154	let b = _mm_set1_ps(`3.`);
56155	let r = _mm_mask_scalef_ss(a, `0`, a, b);
56156	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
56157	assert_eq_m128(r, e);
56158	let r = _mm_mask_scalef_ss(a, `0b11111111`, a, b);
56159	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
56160	assert_eq_m128(r, e);
56161	}
56162
56163	#[simd_test(enable = "avx512f")]
56164	unsafe fn test_mm_maskz_scalef_ss() {
56165	let a = _mm_set1_ps(`1.`);
56166	let b = _mm_set1_ps(`3.`);
56167	let r = _mm_maskz_scalef_ss(`0`, a, b);
56168	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
56169	assert_eq_m128(r, e);
56170	let r = _mm_maskz_scalef_ss(`0b11111111`, a, b);
56171	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
56172	assert_eq_m128(r, e);
56173	}
56174
56175	#[simd_test(enable = "avx512f")]
56176	unsafe fn test_mm_scalef_sd() {
56177	let a = _mm_set1_pd(`1.`);
56178	let b = _mm_set1_pd(`3.`);
56179	let r = _mm_scalef_sd(a, b);
56180	let e = _mm_set_pd(`1.`, `8.`);
56181	assert_eq_m128d(r, e);
56182	}
56183
56184	#[simd_test(enable = "avx512f")]
56185	unsafe fn test_mm_mask_scalef_sd() {
56186	let a = _mm_set1_pd(`1.`);
56187	let b = _mm_set1_pd(`3.`);
56188	let r = _mm_mask_scalef_sd(a, `0`, a, b);
56189	let e = _mm_set_pd(`1.`, `1.`);
56190	assert_eq_m128d(r, e);
56191	let r = _mm_mask_scalef_sd(a, `0b11111111`, a, b);
56192	let e = _mm_set_pd(`1.`, `8.`);
56193	assert_eq_m128d(r, e);
56194	}
56195
56196	#[simd_test(enable = "avx512f")]
56197	unsafe fn test_mm_maskz_scalef_sd() {
56198	let a = _mm_set1_pd(`1.`);
56199	let b = _mm_set1_pd(`3.`);
56200	let r = _mm_maskz_scalef_sd(`0`, a, b);
56201	let e = _mm_set_pd(`1.`, `0.`);
56202	assert_eq_m128d(r, e);
56203	let r = _mm_maskz_scalef_sd(`0b11111111`, a, b);
56204	let e = _mm_set_pd(`1.`, `8.`);
56205	assert_eq_m128d(r, e);
56206	}
56207
56208	#[simd_test(enable = "avx512f")]
56209	unsafe fn test_mm_mask_fmadd_ss() {
56210	let a = _mm_set1_ps(`1.`);
56211	let b = _mm_set1_ps(`2.`);
56212	let c = _mm_set1_ps(`3.`);
56213	let r = _mm_mask_fmadd_ss(a, `0`, b, c);
56214	assert_eq_m128(r, a);
56215	let r = _mm_mask_fmadd_ss(a, `0b11111111`, b, c);
56216	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
56217	assert_eq_m128(r, e);
56218	}
56219
56220	#[simd_test(enable = "avx512f")]
56221	unsafe fn test_mm_maskz_fmadd_ss() {
56222	let a = _mm_set1_ps(`1.`);
56223	let b = _mm_set1_ps(`2.`);
56224	let c = _mm_set1_ps(`3.`);
56225	let r = _mm_maskz_fmadd_ss(`0`, a, b, c);
56226	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
56227	assert_eq_m128(r, e);
56228	let r = _mm_maskz_fmadd_ss(`0b11111111`, a, b, c);
56229	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
56230	assert_eq_m128(r, e);
56231	}
56232
56233	#[simd_test(enable = "avx512f")]
56234	unsafe fn test_mm_mask3_fmadd_ss() {
56235	let a = _mm_set1_ps(`1.`);
56236	let b = _mm_set1_ps(`2.`);
56237	let c = _mm_set1_ps(`3.`);
56238	let r = _mm_mask3_fmadd_ss(a, b, c, `0`);
56239	assert_eq_m128(r, c);
56240	let r = _mm_mask3_fmadd_ss(a, b, c, `0b11111111`);
56241	let e = _mm_set_ps(`3.`, `3.`, `3.`, `5.`);
56242	assert_eq_m128(r, e);
56243	}
56244
56245	#[simd_test(enable = "avx512f")]
56246	unsafe fn test_mm_mask_fmadd_sd() {
56247	let a = _mm_set1_pd(`1.`);
56248	let b = _mm_set1_pd(`2.`);
56249	let c = _mm_set1_pd(`3.`);
56250	let r = _mm_mask_fmadd_sd(a, `0`, b, c);
56251	assert_eq_m128d(r, a);
56252	let r = _mm_mask_fmadd_sd(a, `0b11111111`, b, c);
56253	let e = _mm_set_pd(`1.`, `5.`);
56254	assert_eq_m128d(r, e);
56255	}
56256
56257	#[simd_test(enable = "avx512f")]
56258	unsafe fn test_mm_maskz_fmadd_sd() {
56259	let a = _mm_set1_pd(`1.`);
56260	let b = _mm_set1_pd(`2.`);
56261	let c = _mm_set1_pd(`3.`);
56262	let r = _mm_maskz_fmadd_sd(`0`, a, b, c);
56263	let e = _mm_set_pd(`1.`, `0.`);
56264	assert_eq_m128d(r, e);
56265	let r = _mm_maskz_fmadd_sd(`0b11111111`, a, b, c);
56266	let e = _mm_set_pd(`1.`, `5.`);
56267	assert_eq_m128d(r, e);
56268	}
56269
56270	#[simd_test(enable = "avx512f")]
56271	unsafe fn test_mm_mask3_fmadd_sd() {
56272	let a = _mm_set1_pd(`1.`);
56273	let b = _mm_set1_pd(`2.`);
56274	let c = _mm_set1_pd(`3.`);
56275	let r = _mm_mask3_fmadd_sd(a, b, c, `0`);
56276	assert_eq_m128d(r, c);
56277	let r = _mm_mask3_fmadd_sd(a, b, c, `0b11111111`);
56278	let e = _mm_set_pd(`3.`, `5.`);
56279	assert_eq_m128d(r, e);
56280	}
56281
56282	#[simd_test(enable = "avx512f")]
56283	unsafe fn test_mm_mask_fmsub_ss() {
56284	let a = _mm_set1_ps(`1.`);
56285	let b = _mm_set1_ps(`2.`);
56286	let c = _mm_set1_ps(`3.`);
56287	let r = _mm_mask_fmsub_ss(a, `0`, b, c);
56288	assert_eq_m128(r, a);
56289	let r = _mm_mask_fmsub_ss(a, `0b11111111`, b, c);
56290	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
56291	assert_eq_m128(r, e);
56292	}
56293
56294	#[simd_test(enable = "avx512f")]
56295	unsafe fn test_mm_maskz_fmsub_ss() {
56296	let a = _mm_set1_ps(`1.`);
56297	let b = _mm_set1_ps(`2.`);
56298	let c = _mm_set1_ps(`3.`);
56299	let r = _mm_maskz_fmsub_ss(`0`, a, b, c);
56300	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
56301	assert_eq_m128(r, e);
56302	let r = _mm_maskz_fmsub_ss(`0b11111111`, a, b, c);
56303	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
56304	assert_eq_m128(r, e);
56305	}
56306
56307	#[simd_test(enable = "avx512f")]
56308	unsafe fn test_mm_mask3_fmsub_ss() {
56309	let a = _mm_set1_ps(`1.`);
56310	let b = _mm_set1_ps(`2.`);
56311	let c = _mm_set1_ps(`3.`);
56312	let r = _mm_mask3_fmsub_ss(a, b, c, `0`);
56313	assert_eq_m128(r, c);
56314	let r = _mm_mask3_fmsub_ss(a, b, c, `0b11111111`);
56315	let e = _mm_set_ps(`3.`, `3.`, `3.`, `-1.`);
56316	assert_eq_m128(r, e);
56317	}
56318
56319	#[simd_test(enable = "avx512f")]
56320	unsafe fn test_mm_mask_fmsub_sd() {
56321	let a = _mm_set1_pd(`1.`);
56322	let b = _mm_set1_pd(`2.`);
56323	let c = _mm_set1_pd(`3.`);
56324	let r = _mm_mask_fmsub_sd(a, `0`, b, c);
56325	assert_eq_m128d(r, a);
56326	let r = _mm_mask_fmsub_sd(a, `0b11111111`, b, c);
56327	let e = _mm_set_pd(`1.`, `-1.`);
56328	assert_eq_m128d(r, e);
56329	}
56330
56331	#[simd_test(enable = "avx512f")]
56332	unsafe fn test_mm_maskz_fmsub_sd() {
56333	let a = _mm_set1_pd(`1.`);
56334	let b = _mm_set1_pd(`2.`);
56335	let c = _mm_set1_pd(`3.`);
56336	let r = _mm_maskz_fmsub_sd(`0`, a, b, c);
56337	let e = _mm_set_pd(`1.`, `0.`);
56338	assert_eq_m128d(r, e);
56339	let r = _mm_maskz_fmsub_sd(`0b11111111`, a, b, c);
56340	let e = _mm_set_pd(`1.`, `-1.`);
56341	assert_eq_m128d(r, e);
56342	}
56343
56344	#[simd_test(enable = "avx512f")]
56345	unsafe fn test_mm_mask3_fmsub_sd() {
56346	let a = _mm_set1_pd(`1.`);
56347	let b = _mm_set1_pd(`2.`);
56348	let c = _mm_set1_pd(`3.`);
56349	let r = _mm_mask3_fmsub_sd(a, b, c, `0`);
56350	assert_eq_m128d(r, c);
56351	let r = _mm_mask3_fmsub_sd(a, b, c, `0b11111111`);
56352	let e = _mm_set_pd(`3.`, `-1.`);
56353	assert_eq_m128d(r, e);
56354	}
56355
56356	#[simd_test(enable = "avx512f")]
56357	unsafe fn test_mm_mask_fnmadd_ss() {
56358	let a = _mm_set1_ps(`1.`);
56359	let b = _mm_set1_ps(`2.`);
56360	let c = _mm_set1_ps(`3.`);
56361	let r = _mm_mask_fnmadd_ss(a, `0`, b, c);
56362	assert_eq_m128(r, a);
56363	let r = _mm_mask_fnmadd_ss(a, `0b11111111`, b, c);
56364	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
56365	assert_eq_m128(r, e);
56366	}
56367
56368	#[simd_test(enable = "avx512f")]
56369	unsafe fn test_mm_maskz_fnmadd_ss() {
56370	let a = _mm_set1_ps(`1.`);
56371	let b = _mm_set1_ps(`2.`);
56372	let c = _mm_set1_ps(`3.`);
56373	let r = _mm_maskz_fnmadd_ss(`0`, a, b, c);
56374	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
56375	assert_eq_m128(r, e);
56376	let r = _mm_maskz_fnmadd_ss(`0b11111111`, a, b, c);
56377	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
56378	assert_eq_m128(r, e);
56379	}
56380
56381	#[simd_test(enable = "avx512f")]
56382	unsafe fn test_mm_mask3_fnmadd_ss() {
56383	let a = _mm_set1_ps(`1.`);
56384	let b = _mm_set1_ps(`2.`);
56385	let c = _mm_set1_ps(`3.`);
56386	let r = _mm_mask3_fnmadd_ss(a, b, c, `0`);
56387	assert_eq_m128(r, c);
56388	let r = _mm_mask3_fnmadd_ss(a, b, c, `0b11111111`);
56389	let e = _mm_set_ps(`3.`, `3.`, `3.`, `1.`);
56390	assert_eq_m128(r, e);
56391	}
56392
56393	#[simd_test(enable = "avx512f")]
56394	unsafe fn test_mm_mask_fnmadd_sd() {
56395	let a = _mm_set1_pd(`1.`);
56396	let b = _mm_set1_pd(`2.`);
56397	let c = _mm_set1_pd(`3.`);
56398	let r = _mm_mask_fnmadd_sd(a, `0`, b, c);
56399	assert_eq_m128d(r, a);
56400	let r = _mm_mask_fnmadd_sd(a, `0b11111111`, b, c);
56401	let e = _mm_set_pd(`1.`, `1.`);
56402	assert_eq_m128d(r, e);
56403	}
56404
56405	#[simd_test(enable = "avx512f")]
56406	unsafe fn test_mm_maskz_fnmadd_sd() {
56407	let a = _mm_set1_pd(`1.`);
56408	let b = _mm_set1_pd(`2.`);
56409	let c = _mm_set1_pd(`3.`);
56410	let r = _mm_maskz_fnmadd_sd(`0`, a, b, c);
56411	let e = _mm_set_pd(`1.`, `0.`);
56412	assert_eq_m128d(r, e);
56413	let r = _mm_maskz_fnmadd_sd(`0b11111111`, a, b, c);
56414	let e = _mm_set_pd(`1.`, `1.`);
56415	assert_eq_m128d(r, e);
56416	}
56417
56418	#[simd_test(enable = "avx512f")]
56419	unsafe fn test_mm_mask3_fnmadd_sd() {
56420	let a = _mm_set1_pd(`1.`);
56421	let b = _mm_set1_pd(`2.`);
56422	let c = _mm_set1_pd(`3.`);
56423	let r = _mm_mask3_fnmadd_sd(a, b, c, `0`);
56424	assert_eq_m128d(r, c);
56425	let r = _mm_mask3_fnmadd_sd(a, b, c, `0b11111111`);
56426	let e = _mm_set_pd(`3.`, `1.`);
56427	assert_eq_m128d(r, e);
56428	}
56429
56430	#[simd_test(enable = "avx512f")]
56431	unsafe fn test_mm_mask_fnmsub_ss() {
56432	let a = _mm_set1_ps(`1.`);
56433	let b = _mm_set1_ps(`2.`);
56434	let c = _mm_set1_ps(`3.`);
56435	let r = _mm_mask_fnmsub_ss(a, `0`, b, c);
56436	assert_eq_m128(r, a);
56437	let r = _mm_mask_fnmsub_ss(a, `0b11111111`, b, c);
56438	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
56439	assert_eq_m128(r, e);
56440	}
56441
56442	#[simd_test(enable = "avx512f")]
56443	unsafe fn test_mm_maskz_fnmsub_ss() {
56444	let a = _mm_set1_ps(`1.`);
56445	let b = _mm_set1_ps(`2.`);
56446	let c = _mm_set1_ps(`3.`);
56447	let r = _mm_maskz_fnmsub_ss(`0`, a, b, c);
56448	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
56449	assert_eq_m128(r, e);
56450	let r = _mm_maskz_fnmsub_ss(`0b11111111`, a, b, c);
56451	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
56452	assert_eq_m128(r, e);
56453	}
56454
56455	#[simd_test(enable = "avx512f")]
56456	unsafe fn test_mm_mask3_fnmsub_ss() {
56457	let a = _mm_set1_ps(`1.`);
56458	let b = _mm_set1_ps(`2.`);
56459	let c = _mm_set1_ps(`3.`);
56460	let r = _mm_mask3_fnmsub_ss(a, b, c, `0`);
56461	assert_eq_m128(r, c);
56462	let r = _mm_mask3_fnmsub_ss(a, b, c, `0b11111111`);
56463	let e = _mm_set_ps(`3.`, `3.`, `3.`, `-5.`);
56464	assert_eq_m128(r, e);
56465	}
56466
56467	#[simd_test(enable = "avx512f")]
56468	unsafe fn test_mm_mask_fnmsub_sd() {
56469	let a = _mm_set1_pd(`1.`);
56470	let b = _mm_set1_pd(`2.`);
56471	let c = _mm_set1_pd(`3.`);
56472	let r = _mm_mask_fnmsub_sd(a, `0`, b, c);
56473	assert_eq_m128d(r, a);
56474	let r = _mm_mask_fnmsub_sd(a, `0b11111111`, b, c);
56475	let e = _mm_set_pd(`1.`, `-5.`);
56476	assert_eq_m128d(r, e);
56477	}
56478
56479	#[simd_test(enable = "avx512f")]
56480	unsafe fn test_mm_maskz_fnmsub_sd() {
56481	let a = _mm_set1_pd(`1.`);
56482	let b = _mm_set1_pd(`2.`);
56483	let c = _mm_set1_pd(`3.`);
56484	let r = _mm_maskz_fnmsub_sd(`0`, a, b, c);
56485	let e = _mm_set_pd(`1.`, `0.`);
56486	assert_eq_m128d(r, e);
56487	let r = _mm_maskz_fnmsub_sd(`0b11111111`, a, b, c);
56488	let e = _mm_set_pd(`1.`, `-5.`);
56489	assert_eq_m128d(r, e);
56490	}
56491
56492	#[simd_test(enable = "avx512f")]
56493	unsafe fn test_mm_mask3_fnmsub_sd() {
56494	let a = _mm_set1_pd(`1.`);
56495	let b = _mm_set1_pd(`2.`);
56496	let c = _mm_set1_pd(`3.`);
56497	let r = _mm_mask3_fnmsub_sd(a, b, c, `0`);
56498	assert_eq_m128d(r, c);
56499	let r = _mm_mask3_fnmsub_sd(a, b, c, `0b11111111`);
56500	let e = _mm_set_pd(`3.`, `-5.`);
56501	assert_eq_m128d(r, e);
56502	}
56503
56504	#[simd_test(enable = "avx512f")]
56505	unsafe fn test_mm_add_round_ss() {
56506	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56507	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
56508	let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
56509	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
56510	assert_eq_m128(r, e);
56511	}
56512
56513	#[simd_test(enable = "avx512f")]
56514	unsafe fn test_mm_mask_add_round_ss() {
56515	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
56516	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56517	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
56518	let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
56519	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
56520	assert_eq_m128(r, e);
56521	let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
56522	src, `0b11111111`, a, b,
56523	);
56524	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
56525	assert_eq_m128(r, e);
56526	}
56527
56528	#[simd_test(enable = "avx512f")]
56529	unsafe fn test_mm_maskz_add_round_ss() {
56530	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56531	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
56532	let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
56533	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
56534	assert_eq_m128(r, e);
56535	let r =
56536	_mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
56537	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
56538	assert_eq_m128(r, e);
56539	}
56540
56541	#[simd_test(enable = "avx512f")]
56542	unsafe fn test_mm_add_round_sd() {
56543	let a = _mm_set_pd(`1.`, `2.`);
56544	let b = _mm_set_pd(`3.`, `4.`);
56545	let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
56546	let e = _mm_set_pd(`1.`, `6.`);
56547	assert_eq_m128d(r, e);
56548	}
56549
56550	#[simd_test(enable = "avx512f")]
56551	unsafe fn test_mm_mask_add_round_sd() {
56552	let src = _mm_set_pd(`10.`, `11.`);
56553	let a = _mm_set_pd(`1.`, `2.`);
56554	let b = _mm_set_pd(`3.`, `4.`);
56555	let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
56556	let e = _mm_set_pd(`1.`, `11.`);
56557	assert_eq_m128d(r, e);
56558	let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
56559	src, `0b11111111`, a, b,
56560	);
56561	let e = _mm_set_pd(`1.`, `6.`);
56562	assert_eq_m128d(r, e);
56563	}
56564
56565	#[simd_test(enable = "avx512f")]
56566	unsafe fn test_mm_maskz_add_round_sd() {
56567	let a = _mm_set_pd(`1.`, `2.`);
56568	let b = _mm_set_pd(`3.`, `4.`);
56569	let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
56570	let e = _mm_set_pd(`1.`, `0.`);
56571	assert_eq_m128d(r, e);
56572	let r =
56573	_mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
56574	let e = _mm_set_pd(`1.`, `6.`);
56575	assert_eq_m128d(r, e);
56576	}
56577
56578	#[simd_test(enable = "avx512f")]
56579	unsafe fn test_mm_sub_round_ss() {
56580	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56581	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
56582	let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
56583	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
56584	assert_eq_m128(r, e);
56585	}
56586
56587	#[simd_test(enable = "avx512f")]
56588	unsafe fn test_mm_mask_sub_round_ss() {
56589	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
56590	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56591	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
56592	let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
56593	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
56594	assert_eq_m128(r, e);
56595	let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
56596	src, `0b11111111`, a, b,
56597	);
56598	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
56599	assert_eq_m128(r, e);
56600	}
56601
56602	#[simd_test(enable = "avx512f")]
56603	unsafe fn test_mm_maskz_sub_round_ss() {
56604	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56605	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
56606	let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
56607	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
56608	assert_eq_m128(r, e);
56609	let r =
56610	_mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
56611	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
56612	assert_eq_m128(r, e);
56613	}
56614
56615	#[simd_test(enable = "avx512f")]
56616	unsafe fn test_mm_sub_round_sd() {
56617	let a = _mm_set_pd(`1.`, `2.`);
56618	let b = _mm_set_pd(`3.`, `4.`);
56619	let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
56620	let e = _mm_set_pd(`1.`, `-2.`);
56621	assert_eq_m128d(r, e);
56622	}
56623
56624	#[simd_test(enable = "avx512f")]
56625	unsafe fn test_mm_mask_sub_round_sd() {
56626	let src = _mm_set_pd(`10.`, `11.`);
56627	let a = _mm_set_pd(`1.`, `2.`);
56628	let b = _mm_set_pd(`3.`, `4.`);
56629	let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
56630	let e = _mm_set_pd(`1.`, `11.`);
56631	assert_eq_m128d(r, e);
56632	let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
56633	src, `0b11111111`, a, b,
56634	);
56635	let e = _mm_set_pd(`1.`, `-2.`);
56636	assert_eq_m128d(r, e);
56637	}
56638
56639	#[simd_test(enable = "avx512f")]
56640	unsafe fn test_mm_maskz_sub_round_sd() {
56641	let a = _mm_set_pd(`1.`, `2.`);
56642	let b = _mm_set_pd(`3.`, `4.`);
56643	let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
56644	let e = _mm_set_pd(`1.`, `0.`);
56645	assert_eq_m128d(r, e);
56646	let r =
56647	_mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
56648	let e = _mm_set_pd(`1.`, `-2.`);
56649	assert_eq_m128d(r, e);
56650	}
56651
56652	#[simd_test(enable = "avx512f")]
56653	unsafe fn test_mm_mul_round_ss() {
56654	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56655	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
56656	let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
56657	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
56658	assert_eq_m128(r, e);
56659	}
56660
56661	#[simd_test(enable = "avx512f")]
56662	unsafe fn test_mm_mask_mul_round_ss() {
56663	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
56664	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56665	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
56666	let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
56667	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
56668	assert_eq_m128(r, e);
56669	let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
56670	src, `0b11111111`, a, b,
56671	);
56672	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
56673	assert_eq_m128(r, e);
56674	}
56675
56676	#[simd_test(enable = "avx512f")]
56677	unsafe fn test_mm_maskz_mul_round_ss() {
56678	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56679	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
56680	let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
56681	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
56682	assert_eq_m128(r, e);
56683	let r =
56684	_mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
56685	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
56686	assert_eq_m128(r, e);
56687	}
56688
56689	#[simd_test(enable = "avx512f")]
56690	unsafe fn test_mm_mul_round_sd() {
56691	let a = _mm_set_pd(`1.`, `2.`);
56692	let b = _mm_set_pd(`3.`, `4.`);
56693	let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
56694	let e = _mm_set_pd(`1.`, `8.`);
56695	assert_eq_m128d(r, e);
56696	}
56697
56698	#[simd_test(enable = "avx512f")]
56699	unsafe fn test_mm_mask_mul_round_sd() {
56700	let src = _mm_set_pd(`10.`, `11.`);
56701	let a = _mm_set_pd(`1.`, `2.`);
56702	let b = _mm_set_pd(`3.`, `4.`);
56703	let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
56704	let e = _mm_set_pd(`1.`, `11.`);
56705	assert_eq_m128d(r, e);
56706	let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
56707	src, `0b11111111`, a, b,
56708	);
56709	let e = _mm_set_pd(`1.`, `8.`);
56710	assert_eq_m128d(r, e);
56711	}
56712
56713	#[simd_test(enable = "avx512f")]
56714	unsafe fn test_mm_maskz_mul_round_sd() {
56715	let a = _mm_set_pd(`1.`, `2.`);
56716	let b = _mm_set_pd(`3.`, `4.`);
56717	let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
56718	let e = _mm_set_pd(`1.`, `0.`);
56719	assert_eq_m128d(r, e);
56720	let r =
56721	_mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
56722	let e = _mm_set_pd(`1.`, `8.`);
56723	assert_eq_m128d(r, e);
56724	}
56725
56726	#[simd_test(enable = "avx512f")]
56727	unsafe fn test_mm_div_round_ss() {
56728	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56729	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
56730	let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
56731	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
56732	assert_eq_m128(r, e);
56733	}
56734
56735	#[simd_test(enable = "avx512f")]
56736	unsafe fn test_mm_mask_div_round_ss() {
56737	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
56738	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56739	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
56740	let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
56741	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
56742	assert_eq_m128(r, e);
56743	let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
56744	src, `0b11111111`, a, b,
56745	);
56746	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
56747	assert_eq_m128(r, e);
56748	}
56749
56750	#[simd_test(enable = "avx512f")]
56751	unsafe fn test_mm_maskz_div_round_ss() {
56752	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56753	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
56754	let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
56755	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
56756	assert_eq_m128(r, e);
56757	let r =
56758	_mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
56759	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
56760	assert_eq_m128(r, e);
56761	}
56762
56763	#[simd_test(enable = "avx512f")]
56764	unsafe fn test_mm_div_round_sd() {
56765	let a = _mm_set_pd(`1.`, `2.`);
56766	let b = _mm_set_pd(`3.`, `4.`);
56767	let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
56768	let e = _mm_set_pd(`1.`, `0.5`);
56769	assert_eq_m128d(r, e);
56770	}
56771
56772	#[simd_test(enable = "avx512f")]
56773	unsafe fn test_mm_mask_div_round_sd() {
56774	let src = _mm_set_pd(`10.`, `11.`);
56775	let a = _mm_set_pd(`1.`, `2.`);
56776	let b = _mm_set_pd(`3.`, `4.`);
56777	let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
56778	let e = _mm_set_pd(`1.`, `11.`);
56779	assert_eq_m128d(r, e);
56780	let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
56781	src, `0b11111111`, a, b,
56782	);
56783	let e = _mm_set_pd(`1.`, `0.5`);
56784	assert_eq_m128d(r, e);
56785	}
56786
56787	#[simd_test(enable = "avx512f")]
56788	unsafe fn test_mm_maskz_div_round_sd() {
56789	let a = _mm_set_pd(`1.`, `2.`);
56790	let b = _mm_set_pd(`3.`, `4.`);
56791	let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
56792	let e = _mm_set_pd(`1.`, `0.`);
56793	assert_eq_m128d(r, e);
56794	let r =
56795	_mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
56796	let e = _mm_set_pd(`1.`, `0.5`);
56797	assert_eq_m128d(r, e);
56798	}
56799
56800	#[simd_test(enable = "avx512f")]
56801	unsafe fn test_mm_max_round_ss() {
56802	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56803	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
56804	let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
56805	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
56806	assert_eq_m128(r, e);
56807	}
56808
56809	#[simd_test(enable = "avx512f")]
56810	unsafe fn test_mm_mask_max_round_ss() {
56811	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56812	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
56813	let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
56814	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56815	assert_eq_m128(r, e);
56816	let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
56817	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
56818	assert_eq_m128(r, e);
56819	}
56820
56821	#[simd_test(enable = "avx512f")]
56822	unsafe fn test_mm_maskz_max_round_ss() {
56823	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56824	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
56825	let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
56826	let e = _mm_set_ps(`0.`, `1.`, `2.`, `0.`);
56827	assert_eq_m128(r, e);
56828	let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
56829	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
56830	assert_eq_m128(r, e);
56831	}
56832
56833	#[simd_test(enable = "avx512f")]
56834	unsafe fn test_mm_max_round_sd() {
56835	let a = _mm_set_pd(`0.`, `1.`);
56836	let b = _mm_set_pd(`2.`, `3.`);
56837	let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
56838	let e = _mm_set_pd(`0.`, `3.`);
56839	assert_eq_m128d(r, e);
56840	}
56841
56842	#[simd_test(enable = "avx512f")]
56843	unsafe fn test_mm_mask_max_round_sd() {
56844	let a = _mm_set_pd(`0.`, `1.`);
56845	let b = _mm_set_pd(`2.`, `3.`);
56846	let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
56847	let e = _mm_set_pd(`0.`, `1.`);
56848	assert_eq_m128d(r, e);
56849	let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
56850	let e = _mm_set_pd(`0.`, `3.`);
56851	assert_eq_m128d(r, e);
56852	}
56853
56854	#[simd_test(enable = "avx512f")]
56855	unsafe fn test_mm_maskz_max_round_sd() {
56856	let a = _mm_set_pd(`0.`, `1.`);
56857	let b = _mm_set_pd(`2.`, `3.`);
56858	let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
56859	let e = _mm_set_pd(`0.`, `0.`);
56860	assert_eq_m128d(r, e);
56861	let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
56862	let e = _mm_set_pd(`0.`, `3.`);
56863	assert_eq_m128d(r, e);
56864	}
56865
56866	#[simd_test(enable = "avx512f")]
56867	unsafe fn test_mm_min_round_ss() {
56868	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56869	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
56870	let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
56871	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56872	assert_eq_m128(r, e);
56873	}
56874
56875	#[simd_test(enable = "avx512f")]
56876	unsafe fn test_mm_mask_min_round_ss() {
56877	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56878	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
56879	let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
56880	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56881	assert_eq_m128(r, e);
56882	let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
56883	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56884	assert_eq_m128(r, e);
56885	}
56886
56887	#[simd_test(enable = "avx512f")]
56888	unsafe fn test_mm_maskz_min_round_ss() {
56889	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56890	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
56891	let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
56892	let e = _mm_set_ps(`0.`, `1.`, `2.`, `0.`);
56893	assert_eq_m128(r, e);
56894	let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
56895	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56896	assert_eq_m128(r, e);
56897	}
56898
56899	#[simd_test(enable = "avx512f")]
56900	unsafe fn test_mm_min_round_sd() {
56901	let a = _mm_set_pd(`0.`, `1.`);
56902	let b = _mm_set_pd(`2.`, `3.`);
56903	let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
56904	let e = _mm_set_pd(`0.`, `1.`);
56905	assert_eq_m128d(r, e);
56906	}
56907
56908	#[simd_test(enable = "avx512f")]
56909	unsafe fn test_mm_mask_min_round_sd() {
56910	let a = _mm_set_pd(`0.`, `1.`);
56911	let b = _mm_set_pd(`2.`, `3.`);
56912	let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
56913	let e = _mm_set_pd(`0.`, `1.`);
56914	assert_eq_m128d(r, e);
56915	let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
56916	let e = _mm_set_pd(`0.`, `1.`);
56917	assert_eq_m128d(r, e);
56918	}
56919
56920	#[simd_test(enable = "avx512f")]
56921	unsafe fn test_mm_maskz_min_round_sd() {
56922	let a = _mm_set_pd(`0.`, `1.`);
56923	let b = _mm_set_pd(`2.`, `3.`);
56924	let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
56925	let e = _mm_set_pd(`0.`, `0.`);
56926	assert_eq_m128d(r, e);
56927	let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
56928	let e = _mm_set_pd(`0.`, `1.`);
56929	assert_eq_m128d(r, e);
56930	}
56931
56932	#[simd_test(enable = "avx512f")]
56933	unsafe fn test_mm_sqrt_round_ss() {
56934	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56935	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
56936	let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
56937	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
56938	assert_eq_m128(r, e);
56939	}
56940
56941	#[simd_test(enable = "avx512f")]
56942	unsafe fn test_mm_mask_sqrt_round_ss() {
56943	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
56944	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56945	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
56946	let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
56947	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
56948	assert_eq_m128(r, e);
56949	let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
56950	src, `0b11111111`, a, b,
56951	);
56952	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
56953	assert_eq_m128(r, e);
56954	}
56955
56956	#[simd_test(enable = "avx512f")]
56957	unsafe fn test_mm_maskz_sqrt_round_ss() {
56958	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
56959	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
56960	let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
56961	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
56962	assert_eq_m128(r, e);
56963	let r =
56964	_mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
56965	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
56966	assert_eq_m128(r, e);
56967	}
56968
56969	#[simd_test(enable = "avx512f")]
56970	unsafe fn test_mm_sqrt_round_sd() {
56971	let a = _mm_set_pd(`1.`, `2.`);
56972	let b = _mm_set_pd(`3.`, `4.`);
56973	let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
56974	let e = _mm_set_pd(`1.`, `2.`);
56975	assert_eq_m128d(r, e);
56976	}
56977
56978	#[simd_test(enable = "avx512f")]
56979	unsafe fn test_mm_mask_sqrt_round_sd() {
56980	let src = _mm_set_pd(`10.`, `11.`);
56981	let a = _mm_set_pd(`1.`, `2.`);
56982	let b = _mm_set_pd(`3.`, `4.`);
56983	let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
56984	let e = _mm_set_pd(`1.`, `11.`);
56985	assert_eq_m128d(r, e);
56986	let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
56987	src, `0b11111111`, a, b,
56988	);
56989	let e = _mm_set_pd(`1.`, `2.`);
56990	assert_eq_m128d(r, e);
56991	}
56992
56993	#[simd_test(enable = "avx512f")]
56994	unsafe fn test_mm_maskz_sqrt_round_sd() {
56995	let a = _mm_set_pd(`1.`, `2.`);
56996	let b = _mm_set_pd(`3.`, `4.`);
56997	let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
56998	let e = _mm_set_pd(`1.`, `0.`);
56999	assert_eq_m128d(r, e);
57000	let r =
57001	_mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
57002	let e = _mm_set_pd(`1.`, `2.`);
57003	assert_eq_m128d(r, e);
57004	}
57005
57006	#[simd_test(enable = "avx512f")]
57007	unsafe fn test_mm_getexp_round_ss() {
57008	let a = _mm_set1_ps(`2.`);
57009	let b = _mm_set1_ps(`3.`);
57010	let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
57011	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
57012	assert_eq_m128(r, e);
57013	}
57014
57015	#[simd_test(enable = "avx512f")]
57016	unsafe fn test_mm_mask_getexp_round_ss() {
57017	let a = _mm_set1_ps(`2.`);
57018	let b = _mm_set1_ps(`3.`);
57019	let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
57020	let e = _mm_set_ps(`2.`, `2.`, `2.`, `2.`);
57021	assert_eq_m128(r, e);
57022	let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
57023	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
57024	assert_eq_m128(r, e);
57025	}
57026
57027	#[simd_test(enable = "avx512f")]
57028	unsafe fn test_mm_maskz_getexp_round_ss() {
57029	let a = _mm_set1_ps(`2.`);
57030	let b = _mm_set1_ps(`3.`);
57031	let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
57032	let e = _mm_set_ps(`2.`, `2.`, `2.`, `0.`);
57033	assert_eq_m128(r, e);
57034	let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
57035	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
57036	assert_eq_m128(r, e);
57037	}
57038
57039	#[simd_test(enable = "avx512f")]
57040	unsafe fn test_mm_getexp_round_sd() {
57041	let a = _mm_set1_pd(`2.`);
57042	let b = _mm_set1_pd(`3.`);
57043	let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
57044	let e = _mm_set_pd(`2.`, `1.`);
57045	assert_eq_m128d(r, e);
57046	}
57047
57048	#[simd_test(enable = "avx512f")]
57049	unsafe fn test_mm_mask_getexp_round_sd() {
57050	let a = _mm_set1_pd(`2.`);
57051	let b = _mm_set1_pd(`3.`);
57052	let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
57053	let e = _mm_set_pd(`2.`, `2.`);
57054	assert_eq_m128d(r, e);
57055	let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
57056	let e = _mm_set_pd(`2.`, `1.`);
57057	assert_eq_m128d(r, e);
57058	}
57059
57060	#[simd_test(enable = "avx512f")]
57061	unsafe fn test_mm_maskz_getexp_round_sd() {
57062	let a = _mm_set1_pd(`2.`);
57063	let b = _mm_set1_pd(`3.`);
57064	let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
57065	let e = _mm_set_pd(`2.`, `0.`);
57066	assert_eq_m128d(r, e);
57067	let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
57068	let e = _mm_set_pd(`2.`, `1.`);
57069	assert_eq_m128d(r, e);
57070	}
57071
57072	#[simd_test(enable = "avx512f")]
57073	unsafe fn test_mm_getmant_round_ss() {
57074	let a = _mm_set1_ps(`20.`);
57075	let b = _mm_set1_ps(`10.`);
57076	let r =
57077	_mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
57078	a, b,
57079	);
57080	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
57081	assert_eq_m128(r, e);
57082	}
57083
57084	#[simd_test(enable = "avx512f")]
57085	unsafe fn test_mm_mask_getmant_round_ss() {
57086	let a = _mm_set1_ps(`20.`);
57087	let b = _mm_set1_ps(`10.`);
57088	let r = _mm_mask_getmant_round_ss::<
57089	_MM_MANT_NORM_1_2,
57090	_MM_MANT_SIGN_SRC,
57091	_MM_FROUND_CUR_DIRECTION,
57092	>(a, `0`, a, b);
57093	let e = _mm_set_ps(`20.`, `20.`, `20.`, `20.`);
57094	assert_eq_m128(r, e);
57095	let r = _mm_mask_getmant_round_ss::<
57096	_MM_MANT_NORM_1_2,
57097	_MM_MANT_SIGN_SRC,
57098	_MM_FROUND_CUR_DIRECTION,
57099	>(a, `0b11111111`, a, b);
57100	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
57101	assert_eq_m128(r, e);
57102	}
57103
57104	#[simd_test(enable = "avx512f")]
57105	unsafe fn test_mm_maskz_getmant_round_ss() {
57106	let a = _mm_set1_ps(`20.`);
57107	let b = _mm_set1_ps(`10.`);
57108	let r = _mm_maskz_getmant_round_ss::<
57109	_MM_MANT_NORM_1_2,
57110	_MM_MANT_SIGN_SRC,
57111	_MM_FROUND_CUR_DIRECTION,
57112	>(`0`, a, b);
57113	let e = _mm_set_ps(`20.`, `20.`, `20.`, `0.`);
57114	assert_eq_m128(r, e);
57115	let r = _mm_maskz_getmant_round_ss::<
57116	_MM_MANT_NORM_1_2,
57117	_MM_MANT_SIGN_SRC,
57118	_MM_FROUND_CUR_DIRECTION,
57119	>(`0b11111111`, a, b);
57120	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
57121	assert_eq_m128(r, e);
57122	}
57123
57124	#[simd_test(enable = "avx512f")]
57125	unsafe fn test_mm_getmant_round_sd() {
57126	let a = _mm_set1_pd(`20.`);
57127	let b = _mm_set1_pd(`10.`);
57128	let r =
57129	_mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
57130	a, b,
57131	);
57132	let e = _mm_set_pd(`20.`, `1.25`);
57133	assert_eq_m128d(r, e);
57134	}
57135
57136	#[simd_test(enable = "avx512f")]
57137	unsafe fn test_mm_mask_getmant_round_sd() {
57138	let a = _mm_set1_pd(`20.`);
57139	let b = _mm_set1_pd(`10.`);
57140	let r = _mm_mask_getmant_round_sd::<
57141	_MM_MANT_NORM_1_2,
57142	_MM_MANT_SIGN_SRC,
57143	_MM_FROUND_CUR_DIRECTION,
57144	>(a, `0`, a, b);
57145	let e = _mm_set_pd(`20.`, `20.`);
57146	assert_eq_m128d(r, e);
57147	let r = _mm_mask_getmant_round_sd::<
57148	_MM_MANT_NORM_1_2,
57149	_MM_MANT_SIGN_SRC,
57150	_MM_FROUND_CUR_DIRECTION,
57151	>(a, `0b11111111`, a, b);
57152	let e = _mm_set_pd(`20.`, `1.25`);
57153	assert_eq_m128d(r, e);
57154	}
57155
57156	#[simd_test(enable = "avx512f")]
57157	unsafe fn test_mm_maskz_getmant_round_sd() {
57158	let a = _mm_set1_pd(`20.`);
57159	let b = _mm_set1_pd(`10.`);
57160	let r = _mm_maskz_getmant_round_sd::<
57161	_MM_MANT_NORM_1_2,
57162	_MM_MANT_SIGN_SRC,
57163	_MM_FROUND_CUR_DIRECTION,
57164	>(`0`, a, b);
57165	let e = _mm_set_pd(`20.`, `0.`);
57166	assert_eq_m128d(r, e);
57167	let r = _mm_maskz_getmant_round_sd::<
57168	_MM_MANT_NORM_1_2,
57169	_MM_MANT_SIGN_SRC,
57170	_MM_FROUND_CUR_DIRECTION,
57171	>(`0b11111111`, a, b);
57172	let e = _mm_set_pd(`20.`, `1.25`);
57173	assert_eq_m128d(r, e);
57174	}
57175
57176	#[simd_test(enable = "avx512f")]
57177	unsafe fn test_mm_roundscale_round_ss() {
57178	let a = _mm_set1_ps(`2.2`);
57179	let b = _mm_set1_ps(`1.1`);
57180	let r = _mm_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(a, b);
57181	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
57182	assert_eq_m128(r, e);
57183	}
57184
57185	#[simd_test(enable = "avx512f")]
57186	unsafe fn test_mm_mask_roundscale_round_ss() {
57187	let a = _mm_set1_ps(`2.2`);
57188	let b = _mm_set1_ps(`1.1`);
57189	let r = _mm_mask_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
57190	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `2.2`);
57191	assert_eq_m128(r, e);
57192	let r = _mm_mask_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
57193	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
57194	assert_eq_m128(r, e);
57195	}
57196
57197	#[simd_test(enable = "avx512f")]
57198	unsafe fn test_mm_maskz_roundscale_round_ss() {
57199	let a = _mm_set1_ps(`2.2`);
57200	let b = _mm_set1_ps(`1.1`);
57201	let r = _mm_maskz_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(`0`, a, b);
57202	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `0.0`);
57203	assert_eq_m128(r, e);
57204	let r = _mm_maskz_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
57205	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
57206	assert_eq_m128(r, e);
57207	}
57208
57209	#[simd_test(enable = "avx512f")]
57210	unsafe fn test_mm_roundscale_round_sd() {
57211	let a = _mm_set1_pd(`2.2`);
57212	let b = _mm_set1_pd(`1.1`);
57213	let r = _mm_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(a, b);
57214	let e = _mm_set_pd(`2.2`, `1.0`);
57215	assert_eq_m128d(r, e);
57216	}
57217
57218	#[simd_test(enable = "avx512f")]
57219	unsafe fn test_mm_mask_roundscale_round_sd() {
57220	let a = _mm_set1_pd(`2.2`);
57221	let b = _mm_set1_pd(`1.1`);
57222	let r = _mm_mask_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
57223	let e = _mm_set_pd(`2.2`, `2.2`);
57224	assert_eq_m128d(r, e);
57225	let r = _mm_mask_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
57226	let e = _mm_set_pd(`2.2`, `1.0`);
57227	assert_eq_m128d(r, e);
57228	}
57229
57230	#[simd_test(enable = "avx512f")]
57231	unsafe fn test_mm_maskz_roundscale_round_sd() {
57232	let a = _mm_set1_pd(`2.2`);
57233	let b = _mm_set1_pd(`1.1`);
57234	let r = _mm_maskz_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(`0`, a, b);
57235	let e = _mm_set_pd(`2.2`, `0.0`);
57236	assert_eq_m128d(r, e);
57237	let r = _mm_maskz_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
57238	let e = _mm_set_pd(`2.2`, `1.0`);
57239	assert_eq_m128d(r, e);
57240	}
57241
57242	#[simd_test(enable = "avx512f")]
57243	unsafe fn test_mm_scalef_round_ss() {
57244	let a = _mm_set1_ps(`1.`);
57245	let b = _mm_set1_ps(`3.`);
57246	let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
57247	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
57248	assert_eq_m128(r, e);
57249	}
57250
57251	#[simd_test(enable = "avx512f")]
57252	unsafe fn test_mm_mask_scalef_round_ss() {
57253	let a = _mm_set1_ps(`1.`);
57254	let b = _mm_set1_ps(`3.`);
57255	let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57256	a, `0`, a, b,
57257	);
57258	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
57259	assert_eq_m128(r, e);
57260	let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57261	a, `0b11111111`, a, b,
57262	);
57263	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
57264	assert_eq_m128(r, e);
57265	}
57266
57267	#[simd_test(enable = "avx512f")]
57268	unsafe fn test_mm_maskz_scalef_round_ss() {
57269	let a = _mm_set1_ps(`1.`);
57270	let b = _mm_set1_ps(`3.`);
57271	let r =
57272	_mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
57273	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
57274	assert_eq_m128(r, e);
57275	let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57276	`0b11111111`, a, b,
57277	);
57278	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
57279	assert_eq_m128(r, e);
57280	}
57281
57282	#[simd_test(enable = "avx512f")]
57283	unsafe fn test_mm_scalef_round_sd() {
57284	let a = _mm_set1_pd(`1.`);
57285	let b = _mm_set1_pd(`3.`);
57286	let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
57287	let e = _mm_set_pd(`1.`, `8.`);
57288	assert_eq_m128d(r, e);
57289	}
57290
57291	#[simd_test(enable = "avx512f")]
57292	unsafe fn test_mm_mask_scalef_round_sd() {
57293	let a = _mm_set1_pd(`1.`);
57294	let b = _mm_set1_pd(`3.`);
57295	let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57296	a, `0`, a, b,
57297	);
57298	let e = _mm_set_pd(`1.`, `1.`);
57299	assert_eq_m128d(r, e);
57300	let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57301	a, `0b11111111`, a, b,
57302	);
57303	let e = _mm_set_pd(`1.`, `8.`);
57304	assert_eq_m128d(r, e);
57305	}
57306
57307	#[simd_test(enable = "avx512f")]
57308	unsafe fn test_mm_maskz_scalef_round_sd() {
57309	let a = _mm_set1_pd(`1.`);
57310	let b = _mm_set1_pd(`3.`);
57311	let r =
57312	_mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
57313	let e = _mm_set_pd(`1.`, `0.`);
57314	assert_eq_m128d(r, e);
57315	let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57316	`0b11111111`, a, b,
57317	);
57318	let e = _mm_set_pd(`1.`, `8.`);
57319	assert_eq_m128d(r, e);
57320	}
57321
57322	#[simd_test(enable = "avx512f")]
57323	unsafe fn test_mm_fmadd_round_ss() {
57324	let a = _mm_set1_ps(`1.`);
57325	let b = _mm_set1_ps(`2.`);
57326	let c = _mm_set1_ps(`3.`);
57327	let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
57328	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
57329	assert_eq_m128(r, e);
57330	}
57331
57332	#[simd_test(enable = "avx512f")]
57333	unsafe fn test_mm_mask_fmadd_round_ss() {
57334	let a = _mm_set1_ps(`1.`);
57335	let b = _mm_set1_ps(`2.`);
57336	let c = _mm_set1_ps(`3.`);
57337	let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57338	a, `0`, b, c,
57339	);
57340	assert_eq_m128(r, a);
57341	let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57342	a, `0b11111111`, b, c,
57343	);
57344	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
57345	assert_eq_m128(r, e);
57346	}
57347
57348	#[simd_test(enable = "avx512f")]
57349	unsafe fn test_mm_maskz_fmadd_round_ss() {
57350	let a = _mm_set1_ps(`1.`);
57351	let b = _mm_set1_ps(`2.`);
57352	let c = _mm_set1_ps(`3.`);
57353	let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57354	`0`, a, b, c,
57355	);
57356	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
57357	assert_eq_m128(r, e);
57358	let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57359	`0b11111111`, a, b, c,
57360	);
57361	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
57362	assert_eq_m128(r, e);
57363	}
57364
57365	#[simd_test(enable = "avx512f")]
57366	unsafe fn test_mm_mask3_fmadd_round_ss() {
57367	let a = _mm_set1_ps(`1.`);
57368	let b = _mm_set1_ps(`2.`);
57369	let c = _mm_set1_ps(`3.`);
57370	let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57371	a, b, c, `0`,
57372	);
57373	assert_eq_m128(r, c);
57374	let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57375	a, b, c, `0b11111111`,
57376	);
57377	let e = _mm_set_ps(`3.`, `3.`, `3.`, `5.`);
57378	assert_eq_m128(r, e);
57379	}
57380
57381	#[simd_test(enable = "avx512f")]
57382	unsafe fn test_mm_fmadd_round_sd() {
57383	let a = _mm_set1_pd(`1.`);
57384	let b = _mm_set1_pd(`2.`);
57385	let c = _mm_set1_pd(`3.`);
57386	let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
57387	let e = _mm_set_pd(`1.`, `5.`);
57388	assert_eq_m128d(r, e);
57389	}
57390
57391	#[simd_test(enable = "avx512f")]
57392	unsafe fn test_mm_mask_fmadd_round_sd() {
57393	let a = _mm_set1_pd(`1.`);
57394	let b = _mm_set1_pd(`2.`);
57395	let c = _mm_set1_pd(`3.`);
57396	let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57397	a, `0`, b, c,
57398	);
57399	assert_eq_m128d(r, a);
57400	let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57401	a, `0b11111111`, b, c,
57402	);
57403	let e = _mm_set_pd(`1.`, `5.`);
57404	assert_eq_m128d(r, e);
57405	}
57406
57407	#[simd_test(enable = "avx512f")]
57408	unsafe fn test_mm_maskz_fmadd_round_sd() {
57409	let a = _mm_set1_pd(`1.`);
57410	let b = _mm_set1_pd(`2.`);
57411	let c = _mm_set1_pd(`3.`);
57412	let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57413	`0`, a, b, c,
57414	);
57415	let e = _mm_set_pd(`1.`, `0.`);
57416	assert_eq_m128d(r, e);
57417	let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57418	`0b11111111`, a, b, c,
57419	);
57420	let e = _mm_set_pd(`1.`, `5.`);
57421	assert_eq_m128d(r, e);
57422	}
57423
57424	#[simd_test(enable = "avx512f")]
57425	unsafe fn test_mm_mask3_fmadd_round_sd() {
57426	let a = _mm_set1_pd(`1.`);
57427	let b = _mm_set1_pd(`2.`);
57428	let c = _mm_set1_pd(`3.`);
57429	let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57430	a, b, c, `0`,
57431	);
57432	assert_eq_m128d(r, c);
57433	let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57434	a, b, c, `0b11111111`,
57435	);
57436	let e = _mm_set_pd(`3.`, `5.`);
57437	assert_eq_m128d(r, e);
57438	}
57439
57440	#[simd_test(enable = "avx512f")]
57441	unsafe fn test_mm_fmsub_round_ss() {
57442	let a = _mm_set1_ps(`1.`);
57443	let b = _mm_set1_ps(`2.`);
57444	let c = _mm_set1_ps(`3.`);
57445	let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
57446	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
57447	assert_eq_m128(r, e);
57448	}
57449
57450	#[simd_test(enable = "avx512f")]
57451	unsafe fn test_mm_mask_fmsub_round_ss() {
57452	let a = _mm_set1_ps(`1.`);
57453	let b = _mm_set1_ps(`2.`);
57454	let c = _mm_set1_ps(`3.`);
57455	let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57456	a, `0`, b, c,
57457	);
57458	assert_eq_m128(r, a);
57459	let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57460	a, `0b11111111`, b, c,
57461	);
57462	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
57463	assert_eq_m128(r, e);
57464	}
57465
57466	#[simd_test(enable = "avx512f")]
57467	unsafe fn test_mm_maskz_fmsub_round_ss() {
57468	let a = _mm_set1_ps(`1.`);
57469	let b = _mm_set1_ps(`2.`);
57470	let c = _mm_set1_ps(`3.`);
57471	let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57472	`0`, a, b, c,
57473	);
57474	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
57475	assert_eq_m128(r, e);
57476	let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57477	`0b11111111`, a, b, c,
57478	);
57479	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
57480	assert_eq_m128(r, e);
57481	}
57482
57483	#[simd_test(enable = "avx512f")]
57484	unsafe fn test_mm_mask3_fmsub_round_ss() {
57485	let a = _mm_set1_ps(`1.`);
57486	let b = _mm_set1_ps(`2.`);
57487	let c = _mm_set1_ps(`3.`);
57488	let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57489	a, b, c, `0`,
57490	);
57491	assert_eq_m128(r, c);
57492	let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57493	a, b, c, `0b11111111`,
57494	);
57495	let e = _mm_set_ps(`3.`, `3.`, `3.`, `-1.`);
57496	assert_eq_m128(r, e);
57497	}
57498
57499	#[simd_test(enable = "avx512f")]
57500	unsafe fn test_mm_fmsub_round_sd() {
57501	let a = _mm_set1_pd(`1.`);
57502	let b = _mm_set1_pd(`2.`);
57503	let c = _mm_set1_pd(`3.`);
57504	let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
57505	let e = _mm_set_pd(`1.`, `-1.`);
57506	assert_eq_m128d(r, e);
57507	}
57508
57509	#[simd_test(enable = "avx512f")]
57510	unsafe fn test_mm_mask_fmsub_round_sd() {
57511	let a = _mm_set1_pd(`1.`);
57512	let b = _mm_set1_pd(`2.`);
57513	let c = _mm_set1_pd(`3.`);
57514	let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57515	a, `0`, b, c,
57516	);
57517	assert_eq_m128d(r, a);
57518	let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57519	a, `0b11111111`, b, c,
57520	);
57521	let e = _mm_set_pd(`1.`, `-1.`);
57522	assert_eq_m128d(r, e);
57523	}
57524
57525	#[simd_test(enable = "avx512f")]
57526	unsafe fn test_mm_maskz_fmsub_round_sd() {
57527	let a = _mm_set1_pd(`1.`);
57528	let b = _mm_set1_pd(`2.`);
57529	let c = _mm_set1_pd(`3.`);
57530	let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57531	`0`, a, b, c,
57532	);
57533	let e = _mm_set_pd(`1.`, `0.`);
57534	assert_eq_m128d(r, e);
57535	let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57536	`0b11111111`, a, b, c,
57537	);
57538	let e = _mm_set_pd(`1.`, `-1.`);
57539	assert_eq_m128d(r, e);
57540	}
57541
57542	#[simd_test(enable = "avx512f")]
57543	unsafe fn test_mm_mask3_fmsub_round_sd() {
57544	let a = _mm_set1_pd(`1.`);
57545	let b = _mm_set1_pd(`2.`);
57546	let c = _mm_set1_pd(`3.`);
57547	let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57548	a, b, c, `0`,
57549	);
57550	assert_eq_m128d(r, c);
57551	let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57552	a, b, c, `0b11111111`,
57553	);
57554	let e = _mm_set_pd(`3.`, `-1.`);
57555	assert_eq_m128d(r, e);
57556	}
57557
57558	#[simd_test(enable = "avx512f")]
57559	unsafe fn test_mm_fnmadd_round_ss() {
57560	let a = _mm_set1_ps(`1.`);
57561	let b = _mm_set1_ps(`2.`);
57562	let c = _mm_set1_ps(`3.`);
57563	let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
57564	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
57565	assert_eq_m128(r, e);
57566	}
57567
57568	#[simd_test(enable = "avx512f")]
57569	unsafe fn test_mm_mask_fnmadd_round_ss() {
57570	let a = _mm_set1_ps(`1.`);
57571	let b = _mm_set1_ps(`2.`);
57572	let c = _mm_set1_ps(`3.`);
57573	let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57574	a, `0`, b, c,
57575	);
57576	assert_eq_m128(r, a);
57577	let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57578	a, `0b11111111`, b, c,
57579	);
57580	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
57581	assert_eq_m128(r, e);
57582	}
57583
57584	#[simd_test(enable = "avx512f")]
57585	unsafe fn test_mm_maskz_fnmadd_round_ss() {
57586	let a = _mm_set1_ps(`1.`);
57587	let b = _mm_set1_ps(`2.`);
57588	let c = _mm_set1_ps(`3.`);
57589	let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57590	`0`, a, b, c,
57591	);
57592	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
57593	assert_eq_m128(r, e);
57594	let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57595	`0b11111111`, a, b, c,
57596	);
57597	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
57598	assert_eq_m128(r, e);
57599	}
57600
57601	#[simd_test(enable = "avx512f")]
57602	unsafe fn test_mm_mask3_fnmadd_round_ss() {
57603	let a = _mm_set1_ps(`1.`);
57604	let b = _mm_set1_ps(`2.`);
57605	let c = _mm_set1_ps(`3.`);
57606	let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57607	a, b, c, `0`,
57608	);
57609	assert_eq_m128(r, c);
57610	let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57611	a, b, c, `0b11111111`,
57612	);
57613	let e = _mm_set_ps(`3.`, `3.`, `3.`, `1.`);
57614	assert_eq_m128(r, e);
57615	}
57616
57617	#[simd_test(enable = "avx512f")]
57618	unsafe fn test_mm_fnmadd_round_sd() {
57619	let a = _mm_set1_pd(`1.`);
57620	let b = _mm_set1_pd(`2.`);
57621	let c = _mm_set1_pd(`3.`);
57622	let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
57623	let e = _mm_set_pd(`1.`, `1.`);
57624	assert_eq_m128d(r, e);
57625	}
57626
57627	#[simd_test(enable = "avx512f")]
57628	unsafe fn test_mm_mask_fnmadd_round_sd() {
57629	let a = _mm_set1_pd(`1.`);
57630	let b = _mm_set1_pd(`2.`);
57631	let c = _mm_set1_pd(`3.`);
57632	let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57633	a, `0`, b, c,
57634	);
57635	assert_eq_m128d(r, a);
57636	let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57637	a, `0b11111111`, b, c,
57638	);
57639	let e = _mm_set_pd(`1.`, `1.`);
57640	assert_eq_m128d(r, e);
57641	}
57642
57643	#[simd_test(enable = "avx512f")]
57644	unsafe fn test_mm_maskz_fnmadd_round_sd() {
57645	let a = _mm_set1_pd(`1.`);
57646	let b = _mm_set1_pd(`2.`);
57647	let c = _mm_set1_pd(`3.`);
57648	let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57649	`0`, a, b, c,
57650	);
57651	let e = _mm_set_pd(`1.`, `0.`);
57652	assert_eq_m128d(r, e);
57653	let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57654	`0b11111111`, a, b, c,
57655	);
57656	let e = _mm_set_pd(`1.`, `1.`);
57657	assert_eq_m128d(r, e);
57658	}
57659
57660	#[simd_test(enable = "avx512f")]
57661	unsafe fn test_mm_mask3_fnmadd_round_sd() {
57662	let a = _mm_set1_pd(`1.`);
57663	let b = _mm_set1_pd(`2.`);
57664	let c = _mm_set1_pd(`3.`);
57665	let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57666	a, b, c, `0`,
57667	);
57668	assert_eq_m128d(r, c);
57669	let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57670	a, b, c, `0b11111111`,
57671	);
57672	let e = _mm_set_pd(`3.`, `1.`);
57673	assert_eq_m128d(r, e);
57674	}
57675
57676	#[simd_test(enable = "avx512f")]
57677	unsafe fn test_mm_fnmsub_round_ss() {
57678	let a = _mm_set1_ps(`1.`);
57679	let b = _mm_set1_ps(`2.`);
57680	let c = _mm_set1_ps(`3.`);
57681	let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
57682	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
57683	assert_eq_m128(r, e);
57684	}
57685
57686	#[simd_test(enable = "avx512f")]
57687	unsafe fn test_mm_mask_fnmsub_round_ss() {
57688	let a = _mm_set1_ps(`1.`);
57689	let b = _mm_set1_ps(`2.`);
57690	let c = _mm_set1_ps(`3.`);
57691	let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57692	a, `0`, b, c,
57693	);
57694	assert_eq_m128(r, a);
57695	let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57696	a, `0b11111111`, b, c,
57697	);
57698	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
57699	assert_eq_m128(r, e);
57700	}
57701
57702	#[simd_test(enable = "avx512f")]
57703	unsafe fn test_mm_maskz_fnmsub_round_ss() {
57704	let a = _mm_set1_ps(`1.`);
57705	let b = _mm_set1_ps(`2.`);
57706	let c = _mm_set1_ps(`3.`);
57707	let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57708	`0`, a, b, c,
57709	);
57710	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
57711	assert_eq_m128(r, e);
57712	let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57713	`0b11111111`, a, b, c,
57714	);
57715	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
57716	assert_eq_m128(r, e);
57717	}
57718
57719	#[simd_test(enable = "avx512f")]
57720	unsafe fn test_mm_mask3_fnmsub_round_ss() {
57721	let a = _mm_set1_ps(`1.`);
57722	let b = _mm_set1_ps(`2.`);
57723	let c = _mm_set1_ps(`3.`);
57724	let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57725	a, b, c, `0`,
57726	);
57727	assert_eq_m128(r, c);
57728	let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57729	a, b, c, `0b11111111`,
57730	);
57731	let e = _mm_set_ps(`3.`, `3.`, `3.`, `-5.`);
57732	assert_eq_m128(r, e);
57733	}
57734
57735	#[simd_test(enable = "avx512f")]
57736	unsafe fn test_mm_fnmsub_round_sd() {
57737	let a = _mm_set1_pd(`1.`);
57738	let b = _mm_set1_pd(`2.`);
57739	let c = _mm_set1_pd(`3.`);
57740	let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
57741	let e = _mm_set_pd(`1.`, `-5.`);
57742	assert_eq_m128d(r, e);
57743	}
57744
57745	#[simd_test(enable = "avx512f")]
57746	unsafe fn test_mm_mask_fnmsub_round_sd() {
57747	let a = _mm_set1_pd(`1.`);
57748	let b = _mm_set1_pd(`2.`);
57749	let c = _mm_set1_pd(`3.`);
57750	let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57751	a, `0`, b, c,
57752	);
57753	assert_eq_m128d(r, a);
57754	let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57755	a, `0b11111111`, b, c,
57756	);
57757	let e = _mm_set_pd(`1.`, `-5.`);
57758	assert_eq_m128d(r, e);
57759	}
57760
57761	#[simd_test(enable = "avx512f")]
57762	unsafe fn test_mm_maskz_fnmsub_round_sd() {
57763	let a = _mm_set1_pd(`1.`);
57764	let b = _mm_set1_pd(`2.`);
57765	let c = _mm_set1_pd(`3.`);
57766	let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57767	`0`, a, b, c,
57768	);
57769	let e = _mm_set_pd(`1.`, `0.`);
57770	assert_eq_m128d(r, e);
57771	let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57772	`0b11111111`, a, b, c,
57773	);
57774	let e = _mm_set_pd(`1.`, `-5.`);
57775	assert_eq_m128d(r, e);
57776	}
57777
57778	#[simd_test(enable = "avx512f")]
57779	unsafe fn test_mm_mask3_fnmsub_round_sd() {
57780	let a = _mm_set1_pd(`1.`);
57781	let b = _mm_set1_pd(`2.`);
57782	let c = _mm_set1_pd(`3.`);
57783	let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57784	a, b, c, `0`,
57785	);
57786	assert_eq_m128d(r, c);
57787	let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
57788	a, b, c, `0b11111111`,
57789	);
57790	let e = _mm_set_pd(`3.`, `-5.`);
57791	assert_eq_m128d(r, e);
57792	}
57793
57794	#[simd_test(enable = "avx512f")]
57795	unsafe fn test_mm_fixupimm_ss() {
57796	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
57797	let b = _mm_set1_ps(f32::MAX);
57798	let c = _mm_set1_epi32(i32::MAX);
57799	let r = _mm_fixupimm_ss::<`5`>(a, b, c);
57800	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
57801	assert_eq_m128(r, e);
57802	}
57803
57804	#[simd_test(enable = "avx512f")]
57805	unsafe fn test_mm_mask_fixupimm_ss() {
57806	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
57807	let b = _mm_set1_ps(f32::MAX);
57808	let c = _mm_set1_epi32(i32::MAX);
57809	let r = _mm_mask_fixupimm_ss::<`5`>(a, `0b11111111`, b, c);
57810	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
57811	assert_eq_m128(r, e);
57812	}
57813
57814	#[simd_test(enable = "avx512f")]
57815	unsafe fn test_mm_maskz_fixupimm_ss() {
57816	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
57817	let b = _mm_set1_ps(f32::MAX);
57818	let c = _mm_set1_epi32(i32::MAX);
57819	let r = _mm_maskz_fixupimm_ss::<`5`>(`0b00000000`, a, b, c);
57820	let e = _mm_set_ps(`0.`, `0.`, `0.`, `0.0`);
57821	assert_eq_m128(r, e);
57822	let r = _mm_maskz_fixupimm_ss::<`5`>(`0b11111111`, a, b, c);
57823	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
57824	assert_eq_m128(r, e);
57825	}
57826
57827	#[simd_test(enable = "avx512f")]
57828	unsafe fn test_mm_fixupimm_sd() {
57829	let a = _mm_set_pd(`0.`, f64::NAN);
57830	let b = _mm_set1_pd(f64::MAX);
57831	let c = _mm_set1_epi64x(i32::MAX as i64);
57832	let r = _mm_fixupimm_sd::<`5`>(a, b, c);
57833	let e = _mm_set_pd(`0.`, `-0.0`);
57834	assert_eq_m128d(r, e);
57835	}
57836
57837	#[simd_test(enable = "avx512f")]
57838	unsafe fn test_mm_mask_fixupimm_sd() {
57839	let a = _mm_set_pd(`0.`, f64::NAN);
57840	let b = _mm_set1_pd(f64::MAX);
57841	let c = _mm_set1_epi64x(i32::MAX as i64);
57842	let r = _mm_mask_fixupimm_sd::<`5`>(a, `0b11111111`, b, c);
57843	let e = _mm_set_pd(`0.`, `-0.0`);
57844	assert_eq_m128d(r, e);
57845	}
57846
57847	#[simd_test(enable = "avx512f")]
57848	unsafe fn test_mm_maskz_fixupimm_sd() {
57849	let a = _mm_set_pd(`0.`, f64::NAN);
57850	let b = _mm_set1_pd(f64::MAX);
57851	let c = _mm_set1_epi64x(i32::MAX as i64);
57852	let r = _mm_maskz_fixupimm_sd::<`5`>(`0b00000000`, a, b, c);
57853	let e = _mm_set_pd(`0.`, `0.0`);
57854	assert_eq_m128d(r, e);
57855	let r = _mm_maskz_fixupimm_sd::<`5`>(`0b11111111`, a, b, c);
57856	let e = _mm_set_pd(`0.`, `-0.0`);
57857	assert_eq_m128d(r, e);
57858	}
57859
57860	#[simd_test(enable = "avx512f")]
57861	unsafe fn test_mm_fixupimm_round_ss() {
57862	let a = _mm_set_ps(`1.`, `0.`, `0.`, f32::NAN);
57863	let b = _mm_set1_ps(f32::MAX);
57864	let c = _mm_set1_epi32(i32::MAX);
57865	let r = _mm_fixupimm_round_ss::<`5`, _MM_FROUND_CUR_DIRECTION>(a, b, c);
57866	let e = _mm_set_ps(`1.`, `0.`, `0.`, `-0.0`);
57867	assert_eq_m128(r, e);
57868	}
57869
57870	#[simd_test(enable = "avx512f")]
57871	unsafe fn test_mm_mask_fixupimm_round_ss() {
57872	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
57873	let b = _mm_set1_ps(f32::MAX);
57874	let c = _mm_set1_epi32(i32::MAX);
57875	let r = _mm_mask_fixupimm_round_ss::<`5`, _MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, b, c);
57876	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
57877	assert_eq_m128(r, e);
57878	}
57879
57880	#[simd_test(enable = "avx512f")]
57881	unsafe fn test_mm_maskz_fixupimm_round_ss() {
57882	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
57883	let b = _mm_set1_ps(f32::MAX);
57884	let c = _mm_set1_epi32(i32::MAX);
57885	let r = _mm_maskz_fixupimm_round_ss::<`5`, _MM_FROUND_CUR_DIRECTION>(`0b00000000`, a, b, c);
57886	let e = _mm_set_ps(`0.`, `0.`, `0.`, `0.0`);
57887	assert_eq_m128(r, e);
57888	let r = _mm_maskz_fixupimm_round_ss::<`5`, _MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b, c);
57889	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
57890	assert_eq_m128(r, e);
57891	}
57892
57893	#[simd_test(enable = "avx512f")]
57894	unsafe fn test_mm_fixupimm_round_sd() {
57895	let a = _mm_set_pd(`0.`, f64::NAN);
57896	let b = _mm_set1_pd(f64::MAX);
57897	let c = _mm_set1_epi64x(i32::MAX as i64);
57898	let r = _mm_fixupimm_round_sd::<`5`, _MM_FROUND_CUR_DIRECTION>(a, b, c);
57899	let e = _mm_set_pd(`0.`, `-0.0`);
57900	assert_eq_m128d(r, e);
57901	}
57902
57903	#[simd_test(enable = "avx512f")]
57904	unsafe fn test_mm_mask_fixupimm_round_sd() {
57905	let a = _mm_set_pd(`0.`, f64::NAN);
57906	let b = _mm_set1_pd(f64::MAX);
57907	let c = _mm_set1_epi64x(i32::MAX as i64);
57908	let r = _mm_mask_fixupimm_round_sd::<`5`, _MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, b, c);
57909	let e = _mm_set_pd(`0.`, `-0.0`);
57910	assert_eq_m128d(r, e);
57911	}
57912
57913	#[simd_test(enable = "avx512f")]
57914	unsafe fn test_mm_maskz_fixupimm_round_sd() {
57915	let a = _mm_set_pd(`0.`, f64::NAN);
57916	let b = _mm_set1_pd(f64::MAX);
57917	let c = _mm_set1_epi64x(i32::MAX as i64);
57918	let r = _mm_maskz_fixupimm_round_sd::<`5`, _MM_FROUND_CUR_DIRECTION>(`0b00000000`, a, b, c);
57919	let e = _mm_set_pd(`0.`, `0.0`);
57920	assert_eq_m128d(r, e);
57921	let r = _mm_maskz_fixupimm_round_sd::<`5`, _MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b, c);
57922	let e = _mm_set_pd(`0.`, `-0.0`);
57923	assert_eq_m128d(r, e);
57924	}
57925
57926	#[simd_test(enable = "avx512f")]
57927	unsafe fn test_mm_mask_cvtss_sd() {
57928	let a = _mm_set_pd(`6.`, `-7.5`);
57929	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
57930	let r = _mm_mask_cvtss_sd(a, `0`, a, b);
57931	assert_eq_m128d(r, a);
57932	let r = _mm_mask_cvtss_sd(a, `0b11111111`, a, b);
57933	let e = _mm_set_pd(`6.`, `-1.5`);
57934	assert_eq_m128d(r, e);
57935	}
57936
57937	#[simd_test(enable = "avx512f")]
57938	unsafe fn test_mm_maskz_cvtss_sd() {
57939	let a = _mm_set_pd(`6.`, `-7.5`);
57940	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
57941	let r = _mm_maskz_cvtss_sd(`0`, a, b);
57942	let e = _mm_set_pd(`6.`, `0.`);
57943	assert_eq_m128d(r, e);
57944	let r = _mm_maskz_cvtss_sd(`0b11111111`, a, b);
57945	let e = _mm_set_pd(`6.`, `-1.5`);
57946	assert_eq_m128d(r, e);
57947	}
57948
57949	#[simd_test(enable = "avx512f")]
57950	unsafe fn test_mm_mask_cvtsd_ss() {
57951	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
57952	let b = _mm_set_pd(`6.`, `-7.5`);
57953	let r = _mm_mask_cvtsd_ss(a, `0`, a, b);
57954	assert_eq_m128(r, a);
57955	let r = _mm_mask_cvtsd_ss(a, `0b11111111`, a, b);
57956	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
57957	assert_eq_m128(r, e);
57958	}
57959
57960	#[simd_test(enable = "avx512f")]
57961	unsafe fn test_mm_maskz_cvtsd_ss() {
57962	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
57963	let b = _mm_set_pd(`6.`, `-7.5`);
57964	let r = _mm_maskz_cvtsd_ss(`0`, a, b);
57965	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `0.`);
57966	assert_eq_m128(r, e);
57967	let r = _mm_maskz_cvtsd_ss(`0b11111111`, a, b);
57968	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
57969	assert_eq_m128(r, e);
57970	}
57971
57972	#[simd_test(enable = "avx512f")]
57973	unsafe fn test_mm_cvt_roundss_sd() {
57974	let a = _mm_set_pd(`6.`, `-7.5`);
57975	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
57976	let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
57977	let e = _mm_set_pd(`6.`, `-1.5`);
57978	assert_eq_m128d(r, e);
57979	}
57980
57981	#[simd_test(enable = "avx512f")]
57982	unsafe fn test_mm_mask_cvt_roundss_sd() {
57983	let a = _mm_set_pd(`6.`, `-7.5`);
57984	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
57985	let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
57986	assert_eq_m128d(r, a);
57987	let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
57988	let e = _mm_set_pd(`6.`, `-1.5`);
57989	assert_eq_m128d(r, e);
57990	}
57991
57992	#[simd_test(enable = "avx512f")]
57993	unsafe fn test_mm_maskz_cvt_roundss_sd() {
57994	let a = _mm_set_pd(`6.`, `-7.5`);
57995	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
57996	let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
57997	let e = _mm_set_pd(`6.`, `0.`);
57998	assert_eq_m128d(r, e);
57999	let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
58000	let e = _mm_set_pd(`6.`, `-1.5`);
58001	assert_eq_m128d(r, e);
58002	}
58003
58004	#[simd_test(enable = "avx512f")]
58005	unsafe fn test_mm_cvt_roundsd_ss() {
58006	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58007	let b = _mm_set_pd(`6.`, `-7.5`);
58008	let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58009	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
58010	assert_eq_m128(r, e);
58011	}
58012
58013	#[simd_test(enable = "avx512f")]
58014	unsafe fn test_mm_mask_cvt_roundsd_ss() {
58015	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58016	let b = _mm_set_pd(`6.`, `-7.5`);
58017	let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, `0`, a, b);
58018	assert_eq_m128(r, a);
58019	let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
58020	a, `0b11111111`, a, b,
58021	);
58022	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
58023	assert_eq_m128(r, e);
58024	}
58025
58026	#[simd_test(enable = "avx512f")]
58027	unsafe fn test_mm_maskz_cvt_roundsd_ss() {
58028	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58029	let b = _mm_set_pd(`6.`, `-7.5`);
58030	let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
58031	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `0.`);
58032	assert_eq_m128(r, e);
58033	let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
58034	`0b11111111`, a, b,
58035	);
58036	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
58037	assert_eq_m128(r, e);
58038	}
58039
58040	#[simd_test(enable = "avx512f")]
58041	unsafe fn test_mm_cvt_roundss_si32() {
58042	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58043	let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
58044	let e: i32 = `-1`;
58045	assert_eq!(r, e);
58046	}
58047
58048	#[simd_test(enable = "avx512f")]
58049	unsafe fn test_mm_cvt_roundss_i32() {
58050	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58051	let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
58052	let e: i32 = `-1`;
58053	assert_eq!(r, e);
58054	}
58055
58056	#[simd_test(enable = "avx512f")]
58057	unsafe fn test_mm_cvt_roundss_u32() {
58058	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58059	let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
58060	let e: u32 = u32::MAX;
58061	assert_eq!(r, e);
58062	}
58063
58064	#[simd_test(enable = "avx512f")]
58065	unsafe fn test_mm_cvtss_i32() {
58066	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58067	let r = _mm_cvtss_i32(a);
58068	let e: i32 = `-2`;
58069	assert_eq!(r, e);
58070	}
58071
58072	#[simd_test(enable = "avx512f")]
58073	unsafe fn test_mm_cvtss_u32() {
58074	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58075	let r = _mm_cvtss_u32(a);
58076	let e: u32 = u32::MAX;
58077	assert_eq!(r, e);
58078	}
58079
58080	#[simd_test(enable = "avx512f")]
58081	unsafe fn test_mm_cvt_roundsd_si32() {
58082	let a = _mm_set_pd(`1.`, `-1.5`);
58083	let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
58084	let e: i32 = `-1`;
58085	assert_eq!(r, e);
58086	}
58087
58088	#[simd_test(enable = "avx512f")]
58089	unsafe fn test_mm_cvt_roundsd_i32() {
58090	let a = _mm_set_pd(`1.`, `-1.5`);
58091	let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
58092	let e: i32 = `-1`;
58093	assert_eq!(r, e);
58094	}
58095
58096	#[simd_test(enable = "avx512f")]
58097	unsafe fn test_mm_cvt_roundsd_u32() {
58098	let a = _mm_set_pd(`1.`, `-1.5`);
58099	let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
58100	let e: u32 = u32::MAX;
58101	assert_eq!(r, e);
58102	}
58103
58104	#[simd_test(enable = "avx512f")]
58105	unsafe fn test_mm_cvtsd_i32() {
58106	let a = _mm_set_pd(`1.`, `-1.5`);
58107	let r = _mm_cvtsd_i32(a);
58108	let e: i32 = `-2`;
58109	assert_eq!(r, e);
58110	}
58111
58112	#[simd_test(enable = "avx512f")]
58113	unsafe fn test_mm_cvtsd_u32() {
58114	let a = _mm_set_pd(`1.`, `-1.5`);
58115	let r = _mm_cvtsd_u32(a);
58116	let e: u32 = u32::MAX;
58117	assert_eq!(r, e);
58118	}
58119
58120	#[simd_test(enable = "avx512f")]
58121	unsafe fn test_mm_cvt_roundi32_ss() {
58122	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58123	let b: i32 = `9`;
58124	let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58125	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
58126	assert_eq_m128(r, e);
58127	}
58128
58129	#[simd_test(enable = "avx512f")]
58130	unsafe fn test_mm_cvt_roundsi32_ss() {
58131	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58132	let b: i32 = `9`;
58133	let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58134	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
58135	assert_eq_m128(r, e);
58136	}
58137
58138	#[simd_test(enable = "avx512f")]
58139	unsafe fn test_mm_cvt_roundu32_ss() {
58140	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58141	let b: u32 = `9`;
58142	let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58143	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
58144	assert_eq_m128(r, e);
58145	}
58146
58147	#[simd_test(enable = "avx512f")]
58148	unsafe fn test_mm_cvti32_ss() {
58149	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58150	let b: i32 = `9`;
58151	let r = _mm_cvti32_ss(a, b);
58152	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
58153	assert_eq_m128(r, e);
58154	}
58155
58156	#[simd_test(enable = "avx512f")]
58157	unsafe fn test_mm_cvti32_sd() {
58158	let a = _mm_set_pd(`1.`, `-1.5`);
58159	let b: i32 = `9`;
58160	let r = _mm_cvti32_sd(a, b);
58161	let e = _mm_set_pd(`1.`, `9.`);
58162	assert_eq_m128d(r, e);
58163	}
58164
58165	#[simd_test(enable = "avx512f")]
58166	unsafe fn test_mm_cvtt_roundss_si32() {
58167	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58168	let r = _mm_cvtt_roundss_si32::<_MM_FROUND_CUR_DIRECTION>(a);
58169	let e: i32 = `-2`;
58170	assert_eq!(r, e);
58171	}
58172
58173	#[simd_test(enable = "avx512f")]
58174	unsafe fn test_mm_cvtt_roundss_i32() {
58175	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58176	let r = _mm_cvtt_roundss_i32::<_MM_FROUND_CUR_DIRECTION>(a);
58177	let e: i32 = `-2`;
58178	assert_eq!(r, e);
58179	}
58180
58181	#[simd_test(enable = "avx512f")]
58182	unsafe fn test_mm_cvtt_roundss_u32() {
58183	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58184	let r = _mm_cvtt_roundss_u32::<_MM_FROUND_CUR_DIRECTION>(a);
58185	let e: u32 = u32::MAX;
58186	assert_eq!(r, e);
58187	}
58188
58189	#[simd_test(enable = "avx512f")]
58190	unsafe fn test_mm_cvttss_i32() {
58191	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58192	let r = _mm_cvttss_i32(a);
58193	let e: i32 = `-2`;
58194	assert_eq!(r, e);
58195	}
58196
58197	#[simd_test(enable = "avx512f")]
58198	unsafe fn test_mm_cvttss_u32() {
58199	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58200	let r = _mm_cvttss_u32(a);
58201	let e: u32 = u32::MAX;
58202	assert_eq!(r, e);
58203	}
58204
58205	#[simd_test(enable = "avx512f")]
58206	unsafe fn test_mm_cvtt_roundsd_si32() {
58207	let a = _mm_set_pd(`1.`, `-1.5`);
58208	let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_CUR_DIRECTION>(a);
58209	let e: i32 = `-2`;
58210	assert_eq!(r, e);
58211	}
58212
58213	#[simd_test(enable = "avx512f")]
58214	unsafe fn test_mm_cvtt_roundsd_i32() {
58215	let a = _mm_set_pd(`1.`, `-1.5`);
58216	let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_CUR_DIRECTION>(a);
58217	let e: i32 = `-2`;
58218	assert_eq!(r, e);
58219	}
58220
58221	#[simd_test(enable = "avx512f")]
58222	unsafe fn test_mm_cvtt_roundsd_u32() {
58223	let a = _mm_set_pd(`1.`, `-1.5`);
58224	let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_CUR_DIRECTION>(a);
58225	let e: u32 = u32::MAX;
58226	assert_eq!(r, e);
58227	}
58228
58229	#[simd_test(enable = "avx512f")]
58230	unsafe fn test_mm_cvttsd_i32() {
58231	let a = _mm_set_pd(`1.`, `-1.5`);
58232	let r = _mm_cvttsd_i32(a);
58233	let e: i32 = `-2`;
58234	assert_eq!(r, e);
58235	}
58236
58237	#[simd_test(enable = "avx512f")]
58238	unsafe fn test_mm_cvttsd_u32() {
58239	let a = _mm_set_pd(`1.`, `-1.5`);
58240	let r = _mm_cvttsd_u32(a);
58241	let e: u32 = u32::MAX;
58242	assert_eq!(r, e);
58243	}
58244
58245	#[simd_test(enable = "avx512f")]
58246	unsafe fn test_mm_cvtu32_ss() {
58247	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
58248	let b: u32 = `9`;
58249	let r = _mm_cvtu32_ss(a, b);
58250	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
58251	assert_eq_m128(r, e);
58252	}
58253
58254	#[simd_test(enable = "avx512f")]
58255	unsafe fn test_mm_cvtu32_sd() {
58256	let a = _mm_set_pd(`1.`, `-1.5`);
58257	let b: u32 = `9`;
58258	let r = _mm_cvtu32_sd(a, b);
58259	let e = _mm_set_pd(`1.`, `9.`);
58260	assert_eq_m128d(r, e);
58261	}
58262
58263	#[simd_test(enable = "avx512f")]
58264	unsafe fn test_mm_comi_round_ss() {
58265	let a = _mm_set1_ps(`2.2`);
58266	let b = _mm_set1_ps(`1.1`);
58267	let r = _mm_comi_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(a, b);
58268	let e: i32 = `0`;
58269	assert_eq!(r, e);
58270	}
58271
58272	#[simd_test(enable = "avx512f")]
58273	unsafe fn test_mm_comi_round_sd() {
58274	let a = _mm_set1_pd(`2.2`);
58275	let b = _mm_set1_pd(`1.1`);
58276	let r = _mm_comi_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(a, b);
58277	let e: i32 = `0`;
58278	assert_eq!(r, e);
58279	}
58280
58281	#[simd_test(enable = "avx512f")]
58282	unsafe fn test_mm512_cvtsi512_si32() {
58283	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
58284	let r = _mm512_cvtsi512_si32(a);
58285	let e: i32 = `1`;
58286	assert_eq!(r, e);
58287	}
58288
58289	#[simd_test(enable = "avx512f")]
58290	unsafe fn test_mm512_shuffle_pd() {
58291	let a = _mm512_setr_pd(`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`);
58292	let b = _mm512_setr_pd(`2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`);
58293	let r = _mm512_shuffle_pd::<`0b11_11_11_11`>(a, b);
58294	let e = _mm512_setr_pd(`4.`, `3.`, `8.`, `7.`, `4.`, `3.`, `8.`, `7.`);
58295	assert_eq_m512d(r, e);
58296	}
58297
58298	#[simd_test(enable = "avx512f")]
58299	unsafe fn test_mm512_mask_shuffle_pd() {
58300	let a = _mm512_setr_pd(`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`);
58301	let b = _mm512_setr_pd(`2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`);
58302	let r = _mm512_mask_shuffle_pd::<`0b11_11_11_11`>(a, `0`, a, b);
58303	assert_eq_m512d(r, a);
58304	let r = _mm512_mask_shuffle_pd::<`0b11_11_11_11`>(a, `0b11111111`, a, b);
58305	let e = _mm512_setr_pd(`4.`, `3.`, `8.`, `7.`, `4.`, `3.`, `8.`, `7.`);
58306	assert_eq_m512d(r, e);
58307	}
58308
58309	#[simd_test(enable = "avx512f")]
58310	unsafe fn test_mm512_maskz_shuffle_pd() {
58311	let a = _mm512_setr_pd(`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`);
58312	let b = _mm512_setr_pd(`2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`);
58313	let r = _mm512_maskz_shuffle_pd::<`0b11_11_11_11`>(`0`, a, b);
58314	assert_eq_m512d(r, _mm512_setzero_pd());
58315	let r = _mm512_maskz_shuffle_pd::<`0b11_11_11_11`>(`0b00001111`, a, b);
58316	let e = _mm512_setr_pd(`4.`, `3.`, `8.`, `7.`, `0.`, `0.`, `0.`, `0.`);
58317	assert_eq_m512d(r, e);
58318	}
58319
58320	#[simd_test(enable = "avx512f")]
58321	unsafe fn test_mm512_mask_expandloadu_epi32() {
58322	let src = _mm512_set1_epi32(`42`);
58323	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
58324	let p = a.as_ptr();
58325	let m = `0b11101000_11001010`;
58326	let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
58327	let e = _mm512_set_epi32(`8`, `7`, `6`, `42`, `5`, `42`, `42`, `42`, `4`, `3`, `42`, `42`, `2`, `42`, `1`, `42`);
58328	assert_eq_m512i(r, e);
58329	}
58330
58331	#[simd_test(enable = "avx512f")]
58332	unsafe fn test_mm512_maskz_expandloadu_epi32() {
58333	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
58334	let p = a.as_ptr();
58335	let m = `0b11101000_11001010`;
58336	let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
58337	let e = _mm512_set_epi32(`8`, `7`, `6`, `0`, `5`, `0`, `0`, `0`, `4`, `3`, `0`, `0`, `2`, `0`, `1`, `0`);
58338	assert_eq_m512i(r, e);
58339	}
58340
58341	#[simd_test(enable = "avx512f,avx512vl")]
58342	unsafe fn test_mm256_mask_expandloadu_epi32() {
58343	let src = _mm256_set1_epi32(`42`);
58344	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
58345	let p = a.as_ptr();
58346	let m = `0b11101000`;
58347	let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
58348	let e = _mm256_set_epi32(`4`, `3`, `2`, `42`, `1`, `42`, `42`, `42`);
58349	assert_eq_m256i(r, e);
58350	}
58351
58352	#[simd_test(enable = "avx512f,avx512vl")]
58353	unsafe fn test_mm256_maskz_expandloadu_epi32() {
58354	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
58355	let p = a.as_ptr();
58356	let m = `0b11101000`;
58357	let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
58358	let e = _mm256_set_epi32(`4`, `3`, `2`, `0`, `1`, `0`, `0`, `0`);
58359	assert_eq_m256i(r, e);
58360	}
58361
58362	#[simd_test(enable = "avx512f,avx512vl")]
58363	unsafe fn test_mm_mask_expandloadu_epi32() {
58364	let src = _mm_set1_epi32(`42`);
58365	let a = &[`1_i32`, `2`, `3`, `4`];
58366	let p = a.as_ptr();
58367	let m = `0b11111000`;
58368	let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
58369	let e = _mm_set_epi32(`1`, `42`, `42`, `42`);
58370	assert_eq_m128i(r, e);
58371	}
58372
58373	#[simd_test(enable = "avx512f,avx512vl")]
58374	unsafe fn test_mm_maskz_expandloadu_epi32() {
58375	let a = &[`1_i32`, `2`, `3`, `4`];
58376	let p = a.as_ptr();
58377	let m = `0b11111000`;
58378	let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
58379	let e = _mm_set_epi32(`1`, `0`, `0`, `0`);
58380	assert_eq_m128i(r, e);
58381	}
58382
58383	#[simd_test(enable = "avx512f")]
58384	unsafe fn test_mm512_mask_expandloadu_epi64() {
58385	let src = _mm512_set1_epi64(`42`);
58386	let a = &[`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
58387	let p = a.as_ptr();
58388	let m = `0b11101000`;
58389	let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
58390	let e = _mm512_set_epi64(`4`, `3`, `2`, `42`, `1`, `42`, `42`, `42`);
58391	assert_eq_m512i(r, e);
58392	}
58393
58394	#[simd_test(enable = "avx512f")]
58395	unsafe fn test_mm512_maskz_expandloadu_epi64() {
58396	let a = &[`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
58397	let p = a.as_ptr();
58398	let m = `0b11101000`;
58399	let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
58400	let e = _mm512_set_epi64(`4`, `3`, `2`, `0`, `1`, `0`, `0`, `0`);
58401	assert_eq_m512i(r, e);
58402	}
58403
58404	#[simd_test(enable = "avx512f,avx512vl")]
58405	unsafe fn test_mm256_mask_expandloadu_epi64() {
58406	let src = _mm256_set1_epi64x(`42`);
58407	let a = &[`1_i64`, `2`, `3`, `4`];
58408	let p = a.as_ptr();
58409	let m = `0b11101000`;
58410	let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
58411	let e = _mm256_set_epi64x(`1`, `42`, `42`, `42`);
58412	assert_eq_m256i(r, e);
58413	}
58414
58415	#[simd_test(enable = "avx512f,avx512vl")]
58416	unsafe fn test_mm256_maskz_expandloadu_epi64() {
58417	let a = &[`1_i64`, `2`, `3`, `4`];
58418	let p = a.as_ptr();
58419	let m = `0b11101000`;
58420	let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
58421	let e = _mm256_set_epi64x(`1`, `0`, `0`, `0`);
58422	assert_eq_m256i(r, e);
58423	}
58424
58425	#[simd_test(enable = "avx512f,avx512vl")]
58426	unsafe fn test_mm_mask_expandloadu_epi64() {
58427	let src = _mm_set1_epi64x(`42`);
58428	let a = &[`1_i64`, `2`];
58429	let p = a.as_ptr();
58430	let m = `0b11101000`;
58431	let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
58432	let e = _mm_set_epi64x(`42`, `42`);
58433	assert_eq_m128i(r, e);
58434	}
58435
58436	#[simd_test(enable = "avx512f,avx512vl")]
58437	unsafe fn test_mm_maskz_expandloadu_epi64() {
58438	let a = &[`1_i64`, `2`];
58439	let p = a.as_ptr();
58440	let m = `0b11101000`;
58441	let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
58442	let e = _mm_set_epi64x(`0`, `0`);
58443	assert_eq_m128i(r, e);
58444	}
58445
58446	#[simd_test(enable = "avx512f")]
58447	unsafe fn test_mm512_mask_expandloadu_ps() {
58448	let src = _mm512_set1_ps(`42.`);
58449	let a = &[
58450	`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
58451	];
58452	let p = a.as_ptr();
58453	let m = `0b11101000_11001010`;
58454	let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
58455	let e = _mm512_set_ps(
58456	`8.`, `7.`, `6.`, `42.`, `5.`, `42.`, `42.`, `42.`, `4.`, `3.`, `42.`, `42.`, `2.`, `42.`, `1.`, `42.`,
58457	);
58458	assert_eq_m512(r, e);
58459	}
58460
58461	#[simd_test(enable = "avx512f")]
58462	unsafe fn test_mm512_maskz_expandloadu_ps() {
58463	let a = &[
58464	`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
58465	];
58466	let p = a.as_ptr();
58467	let m = `0b11101000_11001010`;
58468	let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
58469	let e = _mm512_set_ps(
58470	`8.`, `7.`, `6.`, `0.`, `5.`, `0.`, `0.`, `0.`, `4.`, `3.`, `0.`, `0.`, `2.`, `0.`, `1.`, `0.`,
58471	);
58472	assert_eq_m512(r, e);
58473	}
58474
58475	#[simd_test(enable = "avx512f,avx512vl")]
58476	unsafe fn test_mm256_mask_expandloadu_ps() {
58477	let src = _mm256_set1_ps(`42.`);
58478	let a = &[`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
58479	let p = a.as_ptr();
58480	let m = `0b11101000`;
58481	let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
58482	let e = _mm256_set_ps(`4.`, `3.`, `2.`, `42.`, `1.`, `42.`, `42.`, `42.`);
58483	assert_eq_m256(r, e);
58484	}
58485
58486	#[simd_test(enable = "avx512f,avx512vl")]
58487	unsafe fn test_mm256_maskz_expandloadu_ps() {
58488	let a = &[`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
58489	let p = a.as_ptr();
58490	let m = `0b11101000`;
58491	let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
58492	let e = _mm256_set_ps(`4.`, `3.`, `2.`, `0.`, `1.`, `0.`, `0.`, `0.`);
58493	assert_eq_m256(r, e);
58494	}
58495
58496	#[simd_test(enable = "avx512f,avx512vl")]
58497	unsafe fn test_mm_mask_expandloadu_ps() {
58498	let src = _mm_set1_ps(`42.`);
58499	let a = &[`1.0f32`, `2.`, `3.`, `4.`];
58500	let p = a.as_ptr();
58501	let m = `0b11101000`;
58502	let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
58503	let e = _mm_set_ps(`1.`, `42.`, `42.`, `42.`);
58504	assert_eq_m128(r, e);
58505	}
58506
58507	#[simd_test(enable = "avx512f,avx512vl")]
58508	unsafe fn test_mm_maskz_expandloadu_ps() {
58509	let a = &[`1.0f32`, `2.`, `3.`, `4.`];
58510	let p = a.as_ptr();
58511	let m = `0b11101000`;
58512	let r = _mm_maskz_expandloadu_ps(m, black_box(p));
58513	let e = _mm_set_ps(`1.`, `0.`, `0.`, `0.`);
58514	assert_eq_m128(r, e);
58515	}
58516
58517	#[simd_test(enable = "avx512f")]
58518	unsafe fn test_mm512_mask_expandloadu_pd() {
58519	let src = _mm512_set1_pd(`42.`);
58520	let a = &[`1.0f64`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
58521	let p = a.as_ptr();
58522	let m = `0b11101000`;
58523	let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
58524	let e = _mm512_set_pd(`4.`, `3.`, `2.`, `42.`, `1.`, `42.`, `42.`, `42.`);
58525	assert_eq_m512d(r, e);
58526	}
58527
58528	#[simd_test(enable = "avx512f")]
58529	unsafe fn test_mm512_maskz_expandloadu_pd() {
58530	let a = &[`1.0f64`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
58531	let p = a.as_ptr();
58532	let m = `0b11101000`;
58533	let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
58534	let e = _mm512_set_pd(`4.`, `3.`, `2.`, `0.`, `1.`, `0.`, `0.`, `0.`);
58535	assert_eq_m512d(r, e);
58536	}
58537
58538	#[simd_test(enable = "avx512f,avx512vl")]
58539	unsafe fn test_mm256_mask_expandloadu_pd() {
58540	let src = _mm256_set1_pd(`42.`);
58541	let a = &[`1.0f64`, `2.`, `3.`, `4.`];
58542	let p = a.as_ptr();
58543	let m = `0b11101000`;
58544	let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
58545	let e = _mm256_set_pd(`1.`, `42.`, `42.`, `42.`);
58546	assert_eq_m256d(r, e);
58547	}
58548
58549	#[simd_test(enable = "avx512f,avx512vl")]
58550	unsafe fn test_mm256_maskz_expandloadu_pd() {
58551	let a = &[`1.0f64`, `2.`, `3.`, `4.`];
58552	let p = a.as_ptr();
58553	let m = `0b11101000`;
58554	let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
58555	let e = _mm256_set_pd(`1.`, `0.`, `0.`, `0.`);
58556	assert_eq_m256d(r, e);
58557	}
58558
58559	#[simd_test(enable = "avx512f,avx512vl")]
58560	unsafe fn test_mm_mask_expandloadu_pd() {
58561	let src = _mm_set1_pd(`42.`);
58562	let a = &[`1.0f64`, `2.`];
58563	let p = a.as_ptr();
58564	let m = `0b11101000`;
58565	let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
58566	let e = _mm_set_pd(`42.`, `42.`);
58567	assert_eq_m128d(r, e);
58568	}
58569
58570	#[simd_test(enable = "avx512f,avx512vl")]
58571	unsafe fn test_mm_maskz_expandloadu_pd() {
58572	let a = &[`1.0f64`, `2.`];
58573	let p = a.as_ptr();
58574	let m = `0b11101000`;
58575	let r = _mm_maskz_expandloadu_pd(m, black_box(p));
58576	let e = _mm_set_pd(`0.`, `0.`);
58577	assert_eq_m128d(r, e);
58578	}
58579	}
58580