avx512dq.rs source code [crates/core_arch/src/x86/avx512dq.rs]

1	use crate::{
2	core_arch::{simd::, x86::},
3	intrinsics::simd::*,
4	mem::transmute,
5	};
6
7	// And //
8
9	/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
10	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
11	/// bit is not set).
12	///
13	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_pd&ig_expand=288)
14	#[inline]
15	#[target_feature(enable = "avx512dq,avx512vl")]
16	#[cfg_attr(test, assert_instr(vandpd))]
17	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18	pub fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
19	unsafe {
20	let and: f64x2 = _mm_and_pd(a, b).as_f64x2();
21	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f64x2()))
22	}
23	}
24
25	/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
26	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
27	///
28	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_pd&ig_expand=289)
29	#[inline]
30	#[target_feature(enable = "avx512dq,avx512vl")]
31	#[cfg_attr(test, assert_instr(vandpd))]
32	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33	pub fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34	unsafe {
35	let and: f64x2 = _mm_and_pd(a, b).as_f64x2();
36	transmute(src:simd_select_bitmask(m:k, yes:and, no:f64x2::ZERO))
37	}
38	}
39
40	/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
41	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
42	/// bit is not set).
43	///
44	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_pd&ig_expand=291)
45	#[inline]
46	#[target_feature(enable = "avx512dq,avx512vl")]
47	#[cfg_attr(test, assert_instr(vandpd))]
48	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
49	pub fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
50	unsafe {
51	let and: f64x4 = _mm256_and_pd(a, b).as_f64x4();
52	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f64x4()))
53	}
54	}
55
56	/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
57	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
58	///
59	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_pd&ig_expand=292)
60	#[inline]
61	#[target_feature(enable = "avx512dq,avx512vl")]
62	#[cfg_attr(test, assert_instr(vandpd))]
63	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
64	pub fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
65	unsafe {
66	let and: f64x4 = _mm256_and_pd(a, b).as_f64x4();
67	transmute(src:simd_select_bitmask(m:k, yes:and, no:f64x4::ZERO))
68	}
69	}
70
71	/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
72	/// and store the results in dst.
73	///
74	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_pd&ig_expand=293)
75	#[inline]
76	#[target_feature(enable = "avx512dq")]
77	#[cfg_attr(test, assert_instr(vandp))]
78	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
79	pub fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
80	unsafe { transmute(src:simd_and(x:transmute::<_, u64x8>(a), y:transmute::<_, u64x8>(src:b))) }
81	}
82
83	/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
84	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
85	/// bit is not set).
86	///
87	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=294)
88	#[inline]
89	#[target_feature(enable = "avx512dq")]
90	#[cfg_attr(test, assert_instr(vandpd))]
91	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
92	pub fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
93	unsafe {
94	let and: f64x8 = _mm512_and_pd(a, b).as_f64x8();
95	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f64x8()))
96	}
97	}
98
99	/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
100	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
101	///
102	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_pd&ig_expand=295)
103	#[inline]
104	#[target_feature(enable = "avx512dq")]
105	#[cfg_attr(test, assert_instr(vandpd))]
106	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
107	pub fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
108	unsafe {
109	let and: f64x8 = _mm512_and_pd(a, b).as_f64x8();
110	transmute(src:simd_select_bitmask(m:k, yes:and, no:f64x8::ZERO))
111	}
112	}
113
114	/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
115	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
116	/// bit is not set).
117	///
118	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_ps&ig_expand=297)
119	#[inline]
120	#[target_feature(enable = "avx512dq,avx512vl")]
121	#[cfg_attr(test, assert_instr(vandps))]
122	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
123	pub fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
124	unsafe {
125	let and: f32x4 = _mm_and_ps(a, b).as_f32x4();
126	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f32x4()))
127	}
128	}
129
130	/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
131	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
132	///
133	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_ps&ig_expand=298)
134	#[inline]
135	#[target_feature(enable = "avx512dq,avx512vl")]
136	#[cfg_attr(test, assert_instr(vandps))]
137	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
138	pub fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
139	unsafe {
140	let and: f32x4 = _mm_and_ps(a, b).as_f32x4();
141	transmute(src:simd_select_bitmask(m:k, yes:and, no:f32x4::ZERO))
142	}
143	}
144
145	/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
146	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
147	/// bit is not set).
148	///
149	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_ps&ig_expand=300)
150	#[inline]
151	#[target_feature(enable = "avx512dq,avx512vl")]
152	#[cfg_attr(test, assert_instr(vandps))]
153	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
154	pub fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
155	unsafe {
156	let and: f32x8 = _mm256_and_ps(a, b).as_f32x8();
157	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f32x8()))
158	}
159	}
160
161	/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
162	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
163	///
164	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_ps&ig_expand=301)
165	#[inline]
166	#[target_feature(enable = "avx512dq,avx512vl")]
167	#[cfg_attr(test, assert_instr(vandps))]
168	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
169	pub fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
170	unsafe {
171	let and: f32x8 = _mm256_and_ps(a, b).as_f32x8();
172	transmute(src:simd_select_bitmask(m:k, yes:and, no:f32x8::ZERO))
173	}
174	}
175
176	/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
177	/// and store the results in dst.
178	///
179	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_ps&ig_expand=303)
180	#[inline]
181	#[target_feature(enable = "avx512dq")]
182	#[cfg_attr(test, assert_instr(vandps))]
183	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
184	pub fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 {
185	unsafe {
186	transmute(src:simd_and(
187	x:transmute::<_, u32x16>(a),
188	y:transmute::<_, u32x16>(src:b),
189	))
190	}
191	}
192
193	/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
194	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
195	/// bit is not set).
196	///
197	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_ps&ig_expand=304)
198	#[inline]
199	#[target_feature(enable = "avx512dq")]
200	#[cfg_attr(test, assert_instr(vandps))]
201	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
202	pub fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
203	unsafe {
204	let and: f32x16 = _mm512_and_ps(a, b).as_f32x16();
205	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f32x16()))
206	}
207	}
208
209	/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
210	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
211	///
212	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_ps&ig_expand=305)
213	#[inline]
214	#[target_feature(enable = "avx512dq")]
215	#[cfg_attr(test, assert_instr(vandps))]
216	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
217	pub fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
218	unsafe {
219	let and: f32x16 = _mm512_and_ps(a, b).as_f32x16();
220	transmute(src:simd_select_bitmask(m:k, yes:and, no:f32x16::ZERO))
221	}
222	}
223
224	// Andnot
225
226	/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
227	/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
228	/// corresponding bit is not set).
229	///
230	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_pd&ig_expand=326)
231	#[inline]
232	#[target_feature(enable = "avx512dq,avx512vl")]
233	#[cfg_attr(test, assert_instr(vandnpd))]
234	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
235	pub fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
236	unsafe {
237	let andnot: f64x2 = _mm_andnot_pd(a, b).as_f64x2();
238	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f64x2()))
239	}
240	}
241
242	/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
243	/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
244	/// corresponding bit is not set).
245	///
246	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_pd&ig_expand=327)
247	#[inline]
248	#[target_feature(enable = "avx512dq,avx512vl")]
249	#[cfg_attr(test, assert_instr(vandnpd))]
250	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
251	pub fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
252	unsafe {
253	let andnot: f64x2 = _mm_andnot_pd(a, b).as_f64x2();
254	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f64x2::ZERO))
255	}
256	}
257
258	/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
259	/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
260	/// corresponding bit is not set).
261	///
262	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_pd&ig_expand=329)
263	#[inline]
264	#[target_feature(enable = "avx512dq,avx512vl")]
265	#[cfg_attr(test, assert_instr(vandnpd))]
266	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
267	pub fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
268	unsafe {
269	let andnot: f64x4 = _mm256_andnot_pd(a, b).as_f64x4();
270	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f64x4()))
271	}
272	}
273
274	/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
275	/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
276	/// corresponding bit is not set).
277	///
278	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_pd&ig_expand=330)
279	#[inline]
280	#[target_feature(enable = "avx512dq,avx512vl")]
281	#[cfg_attr(test, assert_instr(vandnpd))]
282	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
283	pub fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
284	unsafe {
285	let andnot: f64x4 = _mm256_andnot_pd(a, b).as_f64x4();
286	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f64x4::ZERO))
287	}
288	}
289
290	/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
291	/// bitwise AND with b and store the results in dst.
292	///
293	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331)
294	#[inline]
295	#[target_feature(enable = "avx512dq")]
296	#[cfg_attr(test, assert_instr(vandnp))]
297	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
298	pub fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
299	unsafe { _mm512_and_pd(a:_mm512_xor_pd(a, b:transmute(src:_mm512_set1_epi64(`-1`))), b) }
300	}
301
302	/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
303	/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
304	/// corresponding bit is not set).
305	///
306	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_pd&ig_expand=332)
307	#[inline]
308	#[target_feature(enable = "avx512dq")]
309	#[cfg_attr(test, assert_instr(vandnpd))]
310	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
311	pub fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
312	unsafe {
313	let andnot: f64x8 = _mm512_andnot_pd(a, b).as_f64x8();
314	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f64x8()))
315	}
316	}
317
318	/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
319	/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
320	/// corresponding bit is not set).
321	///
322	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_pd&ig_expand=333)
323	#[inline]
324	#[target_feature(enable = "avx512dq")]
325	#[cfg_attr(test, assert_instr(vandnpd))]
326	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
327	pub fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
328	unsafe {
329	let andnot: f64x8 = _mm512_andnot_pd(a, b).as_f64x8();
330	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f64x8::ZERO))
331	}
332	}
333
334	/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
335	/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
336	/// corresponding bit is not set).
337	///
338	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_ps&ig_expand=335)
339	#[inline]
340	#[target_feature(enable = "avx512dq,avx512vl")]
341	#[cfg_attr(test, assert_instr(vandnps))]
342	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
343	pub fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
344	unsafe {
345	let andnot: f32x4 = _mm_andnot_ps(a, b).as_f32x4();
346	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f32x4()))
347	}
348	}
349
350	/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
351	/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
352	/// corresponding bit is not set).
353	///
354	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_ps&ig_expand=336)
355	#[inline]
356	#[target_feature(enable = "avx512dq,avx512vl")]
357	#[cfg_attr(test, assert_instr(vandnps))]
358	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
359	pub fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
360	unsafe {
361	let andnot: f32x4 = _mm_andnot_ps(a, b).as_f32x4();
362	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f32x4::ZERO))
363	}
364	}
365
366	/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
367	/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
368	/// corresponding bit is not set).
369	///
370	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_ps&ig_expand=338)
371	#[inline]
372	#[target_feature(enable = "avx512dq,avx512vl")]
373	#[cfg_attr(test, assert_instr(vandnps))]
374	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
375	pub fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
376	unsafe {
377	let andnot: f32x8 = _mm256_andnot_ps(a, b).as_f32x8();
378	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f32x8()))
379	}
380	}
381
382	/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
383	/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
384	/// corresponding bit is not set).
385	///
386	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_ps&ig_expand=339)
387	#[inline]
388	#[target_feature(enable = "avx512dq,avx512vl")]
389	#[cfg_attr(test, assert_instr(vandnps))]
390	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
391	pub fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
392	unsafe {
393	let andnot: f32x8 = _mm256_andnot_ps(a, b).as_f32x8();
394	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f32x8::ZERO))
395	}
396	}
397
398	/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
399	/// bitwise AND with b and store the results in dst.
400	///
401	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_ps&ig_expand=340)
402	#[inline]
403	#[target_feature(enable = "avx512dq")]
404	#[cfg_attr(test, assert_instr(vandnps))]
405	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
406	pub fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
407	unsafe { _mm512_and_ps(a:_mm512_xor_ps(a, b:transmute(src:_mm512_set1_epi32(`-1`))), b) }
408	}
409
410	/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
411	/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
412	/// corresponding bit is not set).
413	///
414	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_ps&ig_expand=341)
415	#[inline]
416	#[target_feature(enable = "avx512dq")]
417	#[cfg_attr(test, assert_instr(vandnps))]
418	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
419	pub fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
420	unsafe {
421	let andnot: f32x16 = _mm512_andnot_ps(a, b).as_f32x16();
422	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f32x16()))
423	}
424	}
425
426	/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
427	/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
428	/// corresponding bit is not set).
429	///
430	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_ps&ig_expand=342)
431	#[inline]
432	#[target_feature(enable = "avx512dq")]
433	#[cfg_attr(test, assert_instr(vandnps))]
434	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435	pub fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
436	unsafe {
437	let andnot: f32x16 = _mm512_andnot_ps(a, b).as_f32x16();
438	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f32x16::ZERO))
439	}
440	}
441
442	// Or
443
444	/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
445	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
446	/// bit is not set).
447	///
448	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_pd&ig_expand=4824)
449	#[inline]
450	#[target_feature(enable = "avx512dq,avx512vl")]
451	#[cfg_attr(test, assert_instr(vorpd))]
452	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
453	pub fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
454	unsafe {
455	let or: f64x2 = _mm_or_pd(a, b).as_f64x2();
456	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f64x2()))
457	}
458	}
459
460	/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
461	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
462	///
463	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_pd&ig_expand=4825)
464	#[inline]
465	#[target_feature(enable = "avx512dq,avx512vl")]
466	#[cfg_attr(test, assert_instr(vorpd))]
467	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
468	pub fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
469	unsafe {
470	let or: f64x2 = _mm_or_pd(a, b).as_f64x2();
471	transmute(src:simd_select_bitmask(m:k, yes:or, no:f64x2::ZERO))
472	}
473	}
474
475	/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
476	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
477	/// bit is not set).
478	///
479	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_pd&ig_expand=4827)
480	#[inline]
481	#[target_feature(enable = "avx512dq,avx512vl")]
482	#[cfg_attr(test, assert_instr(vorpd))]
483	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
484	pub fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
485	unsafe {
486	let or: f64x4 = _mm256_or_pd(a, b).as_f64x4();
487	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f64x4()))
488	}
489	}
490
491	/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
492	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
493	///
494	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_pd&ig_expand=4828)
495	#[inline]
496	#[target_feature(enable = "avx512dq,avx512vl")]
497	#[cfg_attr(test, assert_instr(vorpd))]
498	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
499	pub fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
500	unsafe {
501	let or: f64x4 = _mm256_or_pd(a, b).as_f64x4();
502	transmute(src:simd_select_bitmask(m:k, yes:or, no:f64x4::ZERO))
503	}
504	}
505
506	/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
507	/// and store the results in dst.
508	///
509	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829)
510	#[inline]
511	#[target_feature(enable = "avx512dq")]
512	#[cfg_attr(test, assert_instr(vorp))]
513	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
514	pub fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
515	unsafe { transmute(src:simd_or(x:transmute::<_, u64x8>(a), y:transmute::<_, u64x8>(src:b))) }
516	}
517
518	/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
519	/// store the results in dst using writemask k (elements are copied from src if the corresponding
520	/// bit is not set).
521	///
522	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_pd&ig_expand=4830)
523	#[inline]
524	#[target_feature(enable = "avx512dq")]
525	#[cfg_attr(test, assert_instr(vorpd))]
526	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
527	pub fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
528	unsafe {
529	let or: f64x8 = _mm512_or_pd(a, b).as_f64x8();
530	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f64x8()))
531	}
532	}
533
534	/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
535	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
536	///
537	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_pd&ig_expand=4831)
538	#[inline]
539	#[target_feature(enable = "avx512dq")]
540	#[cfg_attr(test, assert_instr(vorpd))]
541	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
542	pub fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
543	unsafe {
544	let or: f64x8 = _mm512_or_pd(a, b).as_f64x8();
545	transmute(src:simd_select_bitmask(m:k, yes:or, no:f64x8::ZERO))
546	}
547	}
548
549	/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
550	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
551	/// bit is not set).
552	///
553	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_ps&ig_expand=4833)
554	#[inline]
555	#[target_feature(enable = "avx512dq,avx512vl")]
556	#[cfg_attr(test, assert_instr(vorps))]
557	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
558	pub fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
559	unsafe {
560	let or: f32x4 = _mm_or_ps(a, b).as_f32x4();
561	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f32x4()))
562	}
563	}
564
565	/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
566	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
567	///
568	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_ps&ig_expand=4834)
569	#[inline]
570	#[target_feature(enable = "avx512dq,avx512vl")]
571	#[cfg_attr(test, assert_instr(vorps))]
572	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
573	pub fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
574	unsafe {
575	let or: f32x4 = _mm_or_ps(a, b).as_f32x4();
576	transmute(src:simd_select_bitmask(m:k, yes:or, no:f32x4::ZERO))
577	}
578	}
579
580	/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
581	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
582	/// bit is not set).
583	///
584	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_ps&ig_expand=4836)
585	#[inline]
586	#[target_feature(enable = "avx512dq,avx512vl")]
587	#[cfg_attr(test, assert_instr(vorps))]
588	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
589	pub fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
590	unsafe {
591	let or: f32x8 = _mm256_or_ps(a, b).as_f32x8();
592	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f32x8()))
593	}
594	}
595
596	/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
597	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
598	///
599	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_ps&ig_expand=4837)
600	#[inline]
601	#[target_feature(enable = "avx512dq,avx512vl")]
602	#[cfg_attr(test, assert_instr(vorps))]
603	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
604	pub fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
605	unsafe {
606	let or: f32x8 = _mm256_or_ps(a, b).as_f32x8();
607	transmute(src:simd_select_bitmask(m:k, yes:or, no:f32x8::ZERO))
608	}
609	}
610
611	/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
612	/// and store the results in dst.
613	///
614	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_ps&ig_expand=4838)
615	#[inline]
616	#[target_feature(enable = "avx512dq")]
617	#[cfg_attr(test, assert_instr(vorps))]
618	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
619	pub fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 {
620	unsafe {
621	transmute(src:simd_or(
622	x:transmute::<_, u32x16>(a),
623	y:transmute::<_, u32x16>(src:b),
624	))
625	}
626	}
627
628	/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
629	/// store the results in dst using writemask k (elements are copied from src if the corresponding
630	/// bit is not set).
631	///
632	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_ps&ig_expand=4839)
633	#[inline]
634	#[target_feature(enable = "avx512dq")]
635	#[cfg_attr(test, assert_instr(vorps))]
636	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
637	pub fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
638	unsafe {
639	let or: f32x16 = _mm512_or_ps(a, b).as_f32x16();
640	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f32x16()))
641	}
642	}
643
644	/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
645	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
646	///
647	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_ps&ig_expand=4840)
648	#[inline]
649	#[target_feature(enable = "avx512dq")]
650	#[cfg_attr(test, assert_instr(vorps))]
651	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
652	pub fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
653	unsafe {
654	let or: f32x16 = _mm512_or_ps(a, b).as_f32x16();
655	transmute(src:simd_select_bitmask(m:k, yes:or, no:f32x16::ZERO))
656	}
657	}
658
659	// Xor
660
661	/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
662	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
663	/// bit is not set).
664	///
665	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_pd&ig_expand=7094)
666	#[inline]
667	#[target_feature(enable = "avx512dq,avx512vl")]
668	#[cfg_attr(test, assert_instr(vxorpd))]
669	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
670	pub fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
671	unsafe {
672	let xor: f64x2 = _mm_xor_pd(a, b).as_f64x2();
673	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f64x2()))
674	}
675	}
676
677	/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
678	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
679	///
680	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_pd&ig_expand=7095)
681	#[inline]
682	#[target_feature(enable = "avx512dq,avx512vl")]
683	#[cfg_attr(test, assert_instr(vxorpd))]
684	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
685	pub fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
686	unsafe {
687	let xor: f64x2 = _mm_xor_pd(a, b).as_f64x2();
688	transmute(src:simd_select_bitmask(m:k, yes:xor, no:f64x2::ZERO))
689	}
690	}
691
692	/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
693	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
694	/// bit is not set).
695	///
696	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_pd&ig_expand=7097)
697	#[inline]
698	#[target_feature(enable = "avx512dq,avx512vl")]
699	#[cfg_attr(test, assert_instr(vxorpd))]
700	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
701	pub fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
702	unsafe {
703	let xor: f64x4 = _mm256_xor_pd(a, b).as_f64x4();
704	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f64x4()))
705	}
706	}
707
708	/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
709	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
710	///
711	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_pd&ig_expand=7098)
712	#[inline]
713	#[target_feature(enable = "avx512dq,avx512vl")]
714	#[cfg_attr(test, assert_instr(vxorpd))]
715	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
716	pub fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
717	unsafe {
718	let xor: f64x4 = _mm256_xor_pd(a, b).as_f64x4();
719	transmute(src:simd_select_bitmask(m:k, yes:xor, no:f64x4::ZERO))
720	}
721	}
722
723	/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
724	/// and store the results in dst.
725	///
726	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102)
727	#[inline]
728	#[target_feature(enable = "avx512dq")]
729	#[cfg_attr(test, assert_instr(vxorp))]
730	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
731	pub fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
732	unsafe { transmute(src:simd_xor(x:transmute::<_, u64x8>(a), y:transmute::<_, u64x8>(src:b))) }
733	}
734
735	/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
736	/// store the results in dst using writemask k (elements are copied from src if the corresponding
737	/// bit is not set).
738	///
739	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_pd&ig_expand=7100)
740	#[inline]
741	#[target_feature(enable = "avx512dq")]
742	#[cfg_attr(test, assert_instr(vxorpd))]
743	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
744	pub fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
745	unsafe {
746	let xor: f64x8 = _mm512_xor_pd(a, b).as_f64x8();
747	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f64x8()))
748	}
749	}
750
751	/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
752	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
753	///
754	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_pd&ig_expand=7101)
755	#[inline]
756	#[target_feature(enable = "avx512dq")]
757	#[cfg_attr(test, assert_instr(vxorpd))]
758	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
759	pub fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
760	unsafe {
761	let xor: f64x8 = _mm512_xor_pd(a, b).as_f64x8();
762	transmute(src:simd_select_bitmask(m:k, yes:xor, no:f64x8::ZERO))
763	}
764	}
765
766	/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
767	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
768	/// bit is not set).
769	///
770	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_ps&ig_expand=7103)
771	#[inline]
772	#[target_feature(enable = "avx512dq,avx512vl")]
773	#[cfg_attr(test, assert_instr(vxorps))]
774	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
775	pub fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
776	unsafe {
777	let xor: f32x4 = _mm_xor_ps(a, b).as_f32x4();
778	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f32x4()))
779	}
780	}
781
782	/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
783	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
784	///
785	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_ps&ig_expand=7104)
786	#[inline]
787	#[target_feature(enable = "avx512dq,avx512vl")]
788	#[cfg_attr(test, assert_instr(vxorps))]
789	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
790	pub fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
791	unsafe {
792	let xor: f32x4 = _mm_xor_ps(a, b).as_f32x4();
793	transmute(src:simd_select_bitmask(m:k, yes:xor, no:f32x4::ZERO))
794	}
795	}
796
797	/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
798	/// and store the results in dst using writemask k (elements are copied from src if the corresponding
799	/// bit is not set).
800	///
801	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_ps&ig_expand=7106)
802	#[inline]
803	#[target_feature(enable = "avx512dq,avx512vl")]
804	#[cfg_attr(test, assert_instr(vxorps))]
805	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
806	pub fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
807	unsafe {
808	let xor: f32x8 = _mm256_xor_ps(a, b).as_f32x8();
809	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f32x8()))
810	}
811	}
812
813	/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
814	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
815	///
816	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_ps&ig_expand=7107)
817	#[inline]
818	#[target_feature(enable = "avx512dq,avx512vl")]
819	#[cfg_attr(test, assert_instr(vxorps))]
820	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
821	pub fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
822	unsafe {
823	let xor: f32x8 = _mm256_xor_ps(a, b).as_f32x8();
824	transmute(src:simd_select_bitmask(m:k, yes:xor, no:f32x8::ZERO))
825	}
826	}
827
828	/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
829	/// and store the results in dst.
830	///
831	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_ps&ig_expand=7111)
832	#[inline]
833	#[target_feature(enable = "avx512dq")]
834	#[cfg_attr(test, assert_instr(vxorps))]
835	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
836	pub fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 {
837	unsafe {
838	transmute(src:simd_xor(
839	x:transmute::<_, u32x16>(a),
840	y:transmute::<_, u32x16>(src:b),
841	))
842	}
843	}
844
845	/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
846	/// store the results in dst using writemask k (elements are copied from src if the corresponding
847	/// bit is not set).
848	///
849	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_ps&ig_expand=7109)
850	#[inline]
851	#[target_feature(enable = "avx512dq")]
852	#[cfg_attr(test, assert_instr(vxorps))]
853	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
854	pub fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
855	unsafe {
856	let xor: f32x16 = _mm512_xor_ps(a, b).as_f32x16();
857	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f32x16()))
858	}
859	}
860
861	/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
862	/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
863	///
864	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_ps&ig_expand=7110)
865	#[inline]
866	#[target_feature(enable = "avx512dq")]
867	#[cfg_attr(test, assert_instr(vxorps))]
868	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
869	pub fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
870	unsafe {
871	let xor: f32x16 = _mm512_xor_ps(a, b).as_f32x16();
872	transmute(src:simd_select_bitmask(m:k, yes:xor, no:f32x16::ZERO))
873	}
874	}
875
876	// Broadcast
877
878	/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
879	/// elements of dst.
880	///
881	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x2&ig_expand=509)
882	#[inline]
883	#[target_feature(enable = "avx512dq,avx512vl")]
884	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
885	pub fn _mm256_broadcast_f32x2(a: __m128) -> __m256 {
886	unsafe {
887	let b: f32x8 = simd_shuffle!(a, a, [`0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`]);
888	transmute(src:b)
889	}
890	}
891
892	/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
893	/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
894	///
895	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x2&ig_expand=510)
896	#[inline]
897	#[target_feature(enable = "avx512dq,avx512vl")]
898	#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
899	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
900	pub fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 {
901	unsafe {
902	let b: f32x8 = _mm256_broadcast_f32x2(a).as_f32x8();
903	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8()))
904	}
905	}
906
907	/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
908	/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
909	///
910	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x2&ig_expand=511)
911	#[inline]
912	#[target_feature(enable = "avx512dq,avx512vl")]
913	#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
914	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
915	pub fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 {
916	unsafe {
917	let b: f32x8 = _mm256_broadcast_f32x2(a).as_f32x8();
918	transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO))
919	}
920	}
921
922	/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
923	/// elements of dst.
924	///
925	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x2&ig_expand=512)
926	#[inline]
927	#[target_feature(enable = "avx512dq")]
928	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
929	pub fn _mm512_broadcast_f32x2(a: __m128) -> __m512 {
930	unsafe {
931	let b: f32x16 = simd_shuffle!(a, a, [`0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`]);
932	transmute(src:b)
933	}
934	}
935
936	/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
937	/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
938	///
939	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x2&ig_expand=513)
940	#[inline]
941	#[target_feature(enable = "avx512dq")]
942	#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
943	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
944	pub fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 {
945	unsafe {
946	let b: f32x16 = _mm512_broadcast_f32x2(a).as_f32x16();
947	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x16()))
948	}
949	}
950
951	/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
952	/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
953	///
954	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x2&ig_expand=514)
955	#[inline]
956	#[target_feature(enable = "avx512dq")]
957	#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
958	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
959	pub fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 {
960	unsafe {
961	let b: f32x16 = _mm512_broadcast_f32x2(a).as_f32x16();
962	transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x16::ZERO))
963	}
964	}
965
966	/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
967	/// elements of dst.
968	///
969	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x8&ig_expand=521)
970	#[inline]
971	#[target_feature(enable = "avx512dq")]
972	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
973	pub fn _mm512_broadcast_f32x8(a: __m256) -> __m512 {
974	unsafe {
975	let b: f32x16 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
976	transmute(src:b)
977	}
978	}
979
980	/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
981	/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
982	///
983	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x8&ig_expand=522)
984	#[inline]
985	#[target_feature(enable = "avx512dq")]
986	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
987	pub fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 {
988	unsafe {
989	let b: f32x16 = _mm512_broadcast_f32x8(a).as_f32x16();
990	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x16()))
991	}
992	}
993
994	/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
995	/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
996	///
997	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x8&ig_expand=523)
998	#[inline]
999	#[target_feature(enable = "avx512dq")]
1000	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1001	pub fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 {
1002	unsafe {
1003	let b: f32x16 = _mm512_broadcast_f32x8(a).as_f32x16();
1004	transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x16::ZERO))
1005	}
1006	}
1007
1008	/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1009	/// elements of dst.
1010	///
1011	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f64x2&ig_expand=524)
1012	#[inline]
1013	#[target_feature(enable = "avx512dq,avx512vl")]
1014	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1015	pub fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d {
1016	unsafe {
1017	let b: f64x4 = simd_shuffle!(a, a, [`0`, `1`, `0`, `1`]);
1018	transmute(src:b)
1019	}
1020	}
1021
1022	/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1023	/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1024	///
1025	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f64x2&ig_expand=525)
1026	#[inline]
1027	#[target_feature(enable = "avx512dq,avx512vl")]
1028	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1029	pub fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
1030	unsafe {
1031	let b: f64x4 = _mm256_broadcast_f64x2(a).as_f64x4();
1032	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x4()))
1033	}
1034	}
1035
1036	/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1037	/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1038	///
1039	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f64x2&ig_expand=526)
1040	#[inline]
1041	#[target_feature(enable = "avx512dq,avx512vl")]
1042	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1043	pub fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d {
1044	unsafe {
1045	let b: f64x4 = _mm256_broadcast_f64x2(a).as_f64x4();
1046	transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x4::ZERO))
1047	}
1048	}
1049
1050	/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1051	/// elements of dst.
1052	///
1053	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x2&ig_expand=527)
1054	#[inline]
1055	#[target_feature(enable = "avx512dq")]
1056	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1057	pub fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d {
1058	unsafe {
1059	let b: f64x8 = simd_shuffle!(a, a, [`0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`]);
1060	transmute(src:b)
1061	}
1062	}
1063
1064	/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1065	/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1066	///
1067	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x2&ig_expand=528)
1068	#[inline]
1069	#[target_feature(enable = "avx512dq")]
1070	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1071	pub fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
1072	unsafe {
1073	let b: f64x8 = _mm512_broadcast_f64x2(a).as_f64x8();
1074	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8()))
1075	}
1076	}
1077
1078	/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1079	/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1080	///
1081	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x2&ig_expand=529)
1082	#[inline]
1083	#[target_feature(enable = "avx512dq")]
1084	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1085	pub fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d {
1086	unsafe {
1087	let b: f64x8 = _mm512_broadcast_f64x2(a).as_f64x8();
1088	transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO))
1089	}
1090	}
1091
1092	/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1093	///
1094	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_i32x2&ig_expand=533)
1095	#[inline]
1096	#[target_feature(enable = "avx512dq,avx512vl")]
1097	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1098	pub fn _mm_broadcast_i32x2(a: __m128i) -> __m128i {
1099	unsafe {
1100	let a: i32x4 = a.as_i32x4();
1101	let b: i32x4 = simd_shuffle!(a, a, [`0`, `1`, `0`, `1`]);
1102	transmute(src:b)
1103	}
1104	}
1105
1106	/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1107	/// (elements are copied from src if the corresponding bit is not set).
1108	///
1109	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcast_i32x2&ig_expand=534)
1110	#[inline]
1111	#[target_feature(enable = "avx512dq,avx512vl")]
1112	#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1113	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1114	pub fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
1115	unsafe {
1116	let b: i32x4 = _mm_broadcast_i32x2(a).as_i32x4();
1117	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x4()))
1118	}
1119	}
1120
1121	/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1122	/// (elements are zeroed out if the corresponding bit is not set).
1123	///
1124	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcast_i32x2&ig_expand=535)
1125	#[inline]
1126	#[target_feature(enable = "avx512dq,avx512vl")]
1127	#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1128	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1129	pub fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i {
1130	unsafe {
1131	let b: i32x4 = _mm_broadcast_i32x2(a).as_i32x4();
1132	transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x4::ZERO))
1133	}
1134	}
1135
1136	/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1137	///
1138	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x2&ig_expand=536)
1139	#[inline]
1140	#[target_feature(enable = "avx512dq,avx512vl")]
1141	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1142	pub fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i {
1143	unsafe {
1144	let a: i32x4 = a.as_i32x4();
1145	let b: i32x8 = simd_shuffle!(a, a, [`0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`]);
1146	transmute(src:b)
1147	}
1148	}
1149
1150	/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1151	/// (elements are copied from src if the corresponding bit is not set).
1152	///
1153	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x2&ig_expand=537)
1154	#[inline]
1155	#[target_feature(enable = "avx512dq,avx512vl")]
1156	#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1157	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1158	pub fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
1159	unsafe {
1160	let b: i32x8 = _mm256_broadcast_i32x2(a).as_i32x8();
1161	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x8()))
1162	}
1163	}
1164
1165	/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1166	/// (elements are zeroed out if the corresponding bit is not set).
1167	///
1168	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x2&ig_expand=538)
1169	#[inline]
1170	#[target_feature(enable = "avx512dq,avx512vl")]
1171	#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1172	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1173	pub fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i {
1174	unsafe {
1175	let b: i32x8 = _mm256_broadcast_i32x2(a).as_i32x8();
1176	transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x8::ZERO))
1177	}
1178	}
1179
1180	/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1181	///
1182	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x2&ig_expand=539)
1183	#[inline]
1184	#[target_feature(enable = "avx512dq")]
1185	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1186	pub fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i {
1187	unsafe {
1188	let a: i32x4 = a.as_i32x4();
1189	let b: i32x16 = simd_shuffle!(a, a, [`0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`]);
1190	transmute(src:b)
1191	}
1192	}
1193
1194	/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1195	/// (elements are copied from src if the corresponding bit is not set).
1196	///
1197	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x2&ig_expand=540)
1198	#[inline]
1199	#[target_feature(enable = "avx512dq")]
1200	#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1201	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1202	pub fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
1203	unsafe {
1204	let b: i32x16 = _mm512_broadcast_i32x2(a).as_i32x16();
1205	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x16()))
1206	}
1207	}
1208
1209	/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1210	/// (elements are zeroed out if the corresponding bit is not set).
1211	///
1212	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x2&ig_expand=541)
1213	#[inline]
1214	#[target_feature(enable = "avx512dq")]
1215	#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1216	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1217	pub fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i {
1218	unsafe {
1219	let b: i32x16 = _mm512_broadcast_i32x2(a).as_i32x16();
1220	transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x16::ZERO))
1221	}
1222	}
1223
1224	/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst.
1225	///
1226	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x8&ig_expand=548)
1227	#[inline]
1228	#[target_feature(enable = "avx512dq")]
1229	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1230	pub fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i {
1231	unsafe {
1232	let a: i32x8 = a.as_i32x8();
1233	let b: i32x16 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
1234	transmute(src:b)
1235	}
1236	}
1237
1238	/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using writemask k
1239	/// (elements are copied from src if the corresponding bit is not set).
1240	///
1241	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x8&ig_expand=549)
1242	#[inline]
1243	#[target_feature(enable = "avx512dq")]
1244	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1245	pub fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
1246	unsafe {
1247	let b: i32x16 = _mm512_broadcast_i32x8(a).as_i32x16();
1248	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x16()))
1249	}
1250	}
1251
1252	/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using zeromask k
1253	/// (elements are zeroed out if the corresponding bit is not set).
1254	///
1255	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x8&ig_expand=550)
1256	#[inline]
1257	#[target_feature(enable = "avx512dq")]
1258	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1259	pub fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i {
1260	unsafe {
1261	let b: i32x16 = _mm512_broadcast_i32x8(a).as_i32x16();
1262	transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x16::ZERO))
1263	}
1264	}
1265
1266	/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
1267	///
1268	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i64x2&ig_expand=551)
1269	#[inline]
1270	#[target_feature(enable = "avx512dq,avx512vl")]
1271	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1272	pub fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i {
1273	unsafe {
1274	let a: i64x2 = a.as_i64x2();
1275	let b: i64x4 = simd_shuffle!(a, a, [`0`, `1`, `0`, `1`]);
1276	transmute(src:b)
1277	}
1278	}
1279
1280	/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
1281	/// (elements are copied from src if the corresponding bit is not set).
1282	///
1283	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i64x2&ig_expand=552)
1284	#[inline]
1285	#[target_feature(enable = "avx512dq,avx512vl")]
1286	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1287	pub fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
1288	unsafe {
1289	let b: i64x4 = _mm256_broadcast_i64x2(a).as_i64x4();
1290	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x4()))
1291	}
1292	}
1293
1294	/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
1295	/// (elements are zeroed out if the corresponding bit is not set).
1296	///
1297	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i64x2&ig_expand=553)
1298	#[inline]
1299	#[target_feature(enable = "avx512dq,avx512vl")]
1300	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1301	pub fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i {
1302	unsafe {
1303	let b: i64x4 = _mm256_broadcast_i64x2(a).as_i64x4();
1304	transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x4::ZERO))
1305	}
1306	}
1307
1308	/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
1309	///
1310	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x2&ig_expand=554)
1311	#[inline]
1312	#[target_feature(enable = "avx512dq")]
1313	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1314	pub fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i {
1315	unsafe {
1316	let a: i64x2 = a.as_i64x2();
1317	let b: i64x8 = simd_shuffle!(a, a, [`0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`]);
1318	transmute(src:b)
1319	}
1320	}
1321
1322	/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
1323	/// (elements are copied from src if the corresponding bit is not set).
1324	///
1325	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x2&ig_expand=555)
1326	#[inline]
1327	#[target_feature(enable = "avx512dq")]
1328	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1329	pub fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
1330	unsafe {
1331	let b: i64x8 = _mm512_broadcast_i64x2(a).as_i64x8();
1332	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x8()))
1333	}
1334	}
1335
1336	/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
1337	/// (elements are zeroed out if the corresponding bit is not set).
1338	///
1339	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x2&ig_expand=556)
1340	#[inline]
1341	#[target_feature(enable = "avx512dq")]
1342	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1343	pub fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i {
1344	unsafe {
1345	let b: i64x8 = _mm512_broadcast_i64x2(a).as_i64x8();
1346	transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x8::ZERO))
1347	}
1348	}
1349
1350	// Extract
1351
1352	/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1353	/// selected with IMM8, and stores the result in dst.
1354	///
1355	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x8_ps&ig_expand=2946)
1356	#[inline]
1357	#[target_feature(enable = "avx512dq")]
1358	#[rustc_legacy_const_generics(`1`)]
1359	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1360	pub fn _mm512_extractf32x8_ps<const IMM8: i32>(a: __m512) -> __m256 {
1361	unsafe {
1362	static_assert_uimm_bits!(IMM8, `1`);
1363	match IMM8 & `1` {
1364	`0` => simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]),
1365	_ => simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
1366	}
1367	}
1368	}
1369
1370	/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1371	/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1372	/// if the corresponding bit is not set).
1373	///
1374	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x8_ps&ig_expand=2947)
1375	#[inline]
1376	#[target_feature(enable = "avx512dq")]
1377	#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = `1`))]
1378	#[rustc_legacy_const_generics(`3`)]
1379	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1380	pub fn _mm512_mask_extractf32x8_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m512) -> __m256 {
1381	unsafe {
1382	static_assert_uimm_bits!(IMM8, `1`);
1383	let b: __m256 = _mm512_extractf32x8_ps::<IMM8>(a);
1384	transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:src.as_f32x8()))
1385	}
1386	}
1387
1388	/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1389	/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1390	/// corresponding bit is not set).
1391	///
1392	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x8_ps&ig_expand=2948)
1393	#[inline]
1394	#[target_feature(enable = "avx512dq")]
1395	#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = `1`))]
1396	#[rustc_legacy_const_generics(`2`)]
1397	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1398	pub fn _mm512_maskz_extractf32x8_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m256 {
1399	unsafe {
1400	static_assert_uimm_bits!(IMM8, `1`);
1401	let b: __m256 = _mm512_extractf32x8_ps::<IMM8>(a);
1402	transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:f32x8::ZERO))
1403	}
1404	}
1405
1406	/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1407	/// selected with IMM8, and stores the result in dst.
1408	///
1409	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf64x2_pd&ig_expand=2949)
1410	#[inline]
1411	#[target_feature(enable = "avx512dq,avx512vl")]
1412	#[rustc_legacy_const_generics(`1`)]
1413	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1414	pub fn _mm256_extractf64x2_pd<const IMM8: i32>(a: __m256d) -> __m128d {
1415	unsafe {
1416	static_assert_uimm_bits!(IMM8, `1`);
1417	match IMM8 & `1` {
1418	`0` => simd_shuffle!(a, a, [`0`, `1`]),
1419	_ => simd_shuffle!(a, a, [`2`, `3`]),
1420	}
1421	}
1422	}
1423
1424	/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1425	/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1426	/// if the corresponding bit is not set).
1427	///
1428	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf64x2_pd&ig_expand=2950)
1429	#[inline]
1430	#[target_feature(enable = "avx512dq,avx512vl")]
1431	#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = `1`))]
1432	#[rustc_legacy_const_generics(`3`)]
1433	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1434	pub fn _mm256_mask_extractf64x2_pd<const IMM8: i32>(
1435	src: __m128d,
1436	k: __mmask8,
1437	a: __m256d,
1438	) -> __m128d {
1439	unsafe {
1440	static_assert_uimm_bits!(IMM8, `1`);
1441	let b: __m128d = _mm256_extractf64x2_pd::<IMM8>(a);
1442	transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:src.as_f64x2()))
1443	}
1444	}
1445
1446	/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1447	/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1448	/// corresponding bit is not set).
1449	///
1450	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf64x2_pd&ig_expand=2951)
1451	#[inline]
1452	#[target_feature(enable = "avx512dq,avx512vl")]
1453	#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = `1`))]
1454	#[rustc_legacy_const_generics(`2`)]
1455	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1456	pub fn _mm256_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m128d {
1457	unsafe {
1458	static_assert_uimm_bits!(IMM8, `1`);
1459	let b: __m128d = _mm256_extractf64x2_pd::<IMM8>(a);
1460	transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:f64x2::ZERO))
1461	}
1462	}
1463
1464	/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1465	/// selected with IMM8, and stores the result in dst.
1466	///
1467	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x2_pd&ig_expand=2952)
1468	#[inline]
1469	#[target_feature(enable = "avx512dq")]
1470	#[rustc_legacy_const_generics(`1`)]
1471	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1472	pub fn _mm512_extractf64x2_pd<const IMM8: i32>(a: __m512d) -> __m128d {
1473	unsafe {
1474	static_assert_uimm_bits!(IMM8, `2`);
1475	match IMM8 & `3` {
1476	`0` => simd_shuffle!(a, a, [`0`, `1`]),
1477	`1` => simd_shuffle!(a, a, [`2`, `3`]),
1478	`2` => simd_shuffle!(a, a, [`4`, `5`]),
1479	_ => simd_shuffle!(a, a, [`6`, `7`]),
1480	}
1481	}
1482	}
1483
1484	/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1485	/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1486	/// if the corresponding bit is not set).
1487	///
1488	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x2_pd&ig_expand=2953)
1489	#[inline]
1490	#[target_feature(enable = "avx512dq")]
1491	#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = `3`))]
1492	#[rustc_legacy_const_generics(`3`)]
1493	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1494	pub fn _mm512_mask_extractf64x2_pd<const IMM8: i32>(
1495	src: __m128d,
1496	k: __mmask8,
1497	a: __m512d,
1498	) -> __m128d {
1499	unsafe {
1500	static_assert_uimm_bits!(IMM8, `2`);
1501	let b: f64x2 = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
1502	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x2()))
1503	}
1504	}
1505
1506	/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1507	/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1508	/// corresponding bit is not set).
1509	///
1510	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x2_pd&ig_expand=2954)
1511	#[inline]
1512	#[target_feature(enable = "avx512dq")]
1513	#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = `3`))]
1514	#[rustc_legacy_const_generics(`2`)]
1515	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1516	pub fn _mm512_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m128d {
1517	unsafe {
1518	static_assert_uimm_bits!(IMM8, `2`);
1519	let b: f64x2 = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
1520	transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x2::ZERO))
1521	}
1522	}
1523
1524	/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1525	/// the result in dst.
1526	///
1527	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x8_epi32&ig_expand=2965)
1528	#[inline]
1529	#[target_feature(enable = "avx512dq")]
1530	#[rustc_legacy_const_generics(`1`)]
1531	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1532	pub fn _mm512_extracti32x8_epi32<const IMM8: i32>(a: __m512i) -> __m256i {
1533	unsafe {
1534	static_assert_uimm_bits!(IMM8, `1`);
1535	let a: i32x16 = a.as_i32x16();
1536	let b: i32x8 = match IMM8 & `1` {
1537	`0` => simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]),
1538	_ => simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
1539	};
1540	transmute(src:b)
1541	}
1542	}
1543
1544	/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1545	/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1546	///
1547	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x8_epi32&ig_expand=2966)
1548	#[inline]
1549	#[target_feature(enable = "avx512dq")]
1550	#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = `1`))]
1551	#[rustc_legacy_const_generics(`3`)]
1552	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1553	pub fn _mm512_mask_extracti32x8_epi32<const IMM8: i32>(
1554	src: __m256i,
1555	k: __mmask8,
1556	a: __m512i,
1557	) -> __m256i {
1558	unsafe {
1559	static_assert_uimm_bits!(IMM8, `1`);
1560	let b: i32x8 = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
1561	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x8()))
1562	}
1563	}
1564
1565	/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1566	/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1567	///
1568	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x8_epi32&ig_expand=2967)
1569	#[inline]
1570	#[target_feature(enable = "avx512dq")]
1571	#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = `1`))]
1572	#[rustc_legacy_const_generics(`2`)]
1573	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1574	pub fn _mm512_maskz_extracti32x8_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i {
1575	unsafe {
1576	static_assert_uimm_bits!(IMM8, `1`);
1577	let b: i32x8 = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
1578	transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x8::ZERO))
1579	}
1580	}
1581
1582	/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1583	/// the result in dst.
1584	///
1585	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti64x2_epi64&ig_expand=2968)
1586	#[inline]
1587	#[target_feature(enable = "avx512dq,avx512vl")]
1588	#[rustc_legacy_const_generics(`1`)]
1589	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1590	pub fn _mm256_extracti64x2_epi64<const IMM8: i32>(a: __m256i) -> __m128i {
1591	unsafe {
1592	static_assert_uimm_bits!(IMM8, `1`);
1593	let a: i64x4 = a.as_i64x4();
1594	match IMM8 & `1` {
1595	`0` => simd_shuffle!(a, a, [`0`, `1`]),
1596	_ => simd_shuffle!(a, a, [`2`, `3`]),
1597	}
1598	}
1599	}
1600
1601	/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1602	/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1603	///
1604	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti64x2_epi64&ig_expand=2969)
1605	#[inline]
1606	#[target_feature(enable = "avx512dq,avx512vl")]
1607	#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = `1`))]
1608	#[rustc_legacy_const_generics(`3`)]
1609	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1610	pub fn _mm256_mask_extracti64x2_epi64<const IMM8: i32>(
1611	src: __m128i,
1612	k: __mmask8,
1613	a: __m256i,
1614	) -> __m128i {
1615	unsafe {
1616	static_assert_uimm_bits!(IMM8, `1`);
1617	let b: i64x2 = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1618	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x2()))
1619	}
1620	}
1621
1622	/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1623	/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1624	///
1625	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti64x2_epi64&ig_expand=2970)
1626	#[inline]
1627	#[target_feature(enable = "avx512dq,avx512vl")]
1628	#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = `1`))]
1629	#[rustc_legacy_const_generics(`2`)]
1630	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1631	pub fn _mm256_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i {
1632	unsafe {
1633	static_assert_uimm_bits!(IMM8, `1`);
1634	let b: i64x2 = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1635	transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x2::ZERO))
1636	}
1637	}
1638
1639	/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1640	/// the result in dst.
1641	///
1642	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x2_epi64&ig_expand=2971)
1643	#[inline]
1644	#[target_feature(enable = "avx512dq")]
1645	#[rustc_legacy_const_generics(`1`)]
1646	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1647	pub fn _mm512_extracti64x2_epi64<const IMM8: i32>(a: __m512i) -> __m128i {
1648	unsafe {
1649	static_assert_uimm_bits!(IMM8, `2`);
1650	let a: i64x8 = a.as_i64x8();
1651	match IMM8 & `3` {
1652	`0` => simd_shuffle!(a, a, [`0`, `1`]),
1653	`1` => simd_shuffle!(a, a, [`2`, `3`]),
1654	`2` => simd_shuffle!(a, a, [`4`, `5`]),
1655	_ => simd_shuffle!(a, a, [`6`, `7`]),
1656	}
1657	}
1658	}
1659
1660	/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1661	/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1662	///
1663	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x2_epi64&ig_expand=2972)
1664	#[inline]
1665	#[target_feature(enable = "avx512dq")]
1666	#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = `3`))]
1667	#[rustc_legacy_const_generics(`3`)]
1668	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1669	pub fn _mm512_mask_extracti64x2_epi64<const IMM8: i32>(
1670	src: __m128i,
1671	k: __mmask8,
1672	a: __m512i,
1673	) -> __m128i {
1674	unsafe {
1675	static_assert_uimm_bits!(IMM8, `2`);
1676	let b: i64x2 = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1677	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x2()))
1678	}
1679	}
1680
1681	/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1682	/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1683	///
1684	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x2_epi64&ig_expand=2973)
1685	#[inline]
1686	#[target_feature(enable = "avx512dq")]
1687	#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = `3`))]
1688	#[rustc_legacy_const_generics(`2`)]
1689	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1690	pub fn _mm512_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i {
1691	unsafe {
1692	static_assert_uimm_bits!(IMM8, `2`);
1693	let b: i64x2 = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1694	transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x2::ZERO))
1695	}
1696	}
1697
1698	// Insert
1699
1700	/// Copy a to dst, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1701	/// elements) from b into dst at the location specified by IMM8.
1702	///
1703	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x8&ig_expand=3850)
1704	#[inline]
1705	#[target_feature(enable = "avx512dq")]
1706	#[rustc_legacy_const_generics(`2`)]
1707	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1708	pub fn _mm512_insertf32x8<const IMM8: i32>(a: __m512, b: __m256) -> __m512 {
1709	unsafe {
1710	static_assert_uimm_bits!(IMM8, `1`);
1711	let b: __m512 = _mm512_castps256_ps512(b);
1712	match IMM8 & `1` {
1713	`0` => {
1714	simd_shuffle!(
1715	a,
1716	b,
1717	[`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]
1718	)
1719	}
1720	_ => {
1721	simd_shuffle!(
1722	a,
1723	b,
1724	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`]
1725	)
1726	}
1727	}
1728	}
1729	}
1730
1731	/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1732	/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1733	/// (elements are copied from src if the corresponding bit is not set).
1734	///
1735	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x8&ig_expand=3851)
1736	#[inline]
1737	#[target_feature(enable = "avx512dq")]
1738	#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = `1`))]
1739	#[rustc_legacy_const_generics(`4`)]
1740	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1741	pub fn _mm512_mask_insertf32x8<const IMM8: i32>(
1742	src: __m512,
1743	k: __mmask16,
1744	a: __m512,
1745	b: __m256,
1746	) -> __m512 {
1747	unsafe {
1748	static_assert_uimm_bits!(IMM8, `1`);
1749	let c: __m512 = _mm512_insertf32x8::<IMM8>(a, b);
1750	transmute(src:simd_select_bitmask(m:k, yes:c.as_f32x16(), no:src.as_f32x16()))
1751	}
1752	}
1753
1754	/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1755	/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1756	/// (elements are zeroed out if the corresponding bit is not set).
1757	///
1758	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x8&ig_expand=3852)
1759	#[inline]
1760	#[target_feature(enable = "avx512dq")]
1761	#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = `1`))]
1762	#[rustc_legacy_const_generics(`3`)]
1763	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1764	pub fn _mm512_maskz_insertf32x8<const IMM8: i32>(k: __mmask16, a: __m512, b: __m256) -> __m512 {
1765	unsafe {
1766	static_assert_uimm_bits!(IMM8, `1`);
1767	let c: f32x16 = _mm512_insertf32x8::<IMM8>(a, b).as_f32x16();
1768	transmute(src:simd_select_bitmask(m:k, yes:c, no:f32x16::ZERO))
1769	}
1770	}
1771
1772	/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1773	/// elements) from b into dst at the location specified by IMM8.
1774	///
1775	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf64x2&ig_expand=3853)
1776	#[inline]
1777	#[target_feature(enable = "avx512dq,avx512vl")]
1778	#[rustc_legacy_const_generics(`2`)]
1779	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1780	pub fn _mm256_insertf64x2<const IMM8: i32>(a: __m256d, b: __m128d) -> __m256d {
1781	unsafe {
1782	static_assert_uimm_bits!(IMM8, `1`);
1783	let b: __m256d = _mm256_castpd128_pd256(b);
1784	match IMM8 & `1` {
1785	`0` => simd_shuffle!(a, b, [`4`, `5`, `2`, `3`]),
1786	_ => simd_shuffle!(a, b, [`0`, `1`, `4`, `5`]),
1787	}
1788	}
1789	}
1790
1791	/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1792	/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1793	/// (elements are copied from src if the corresponding bit is not set).
1794	///
1795	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf64x2&ig_expand=3854)
1796	#[inline]
1797	#[target_feature(enable = "avx512dq,avx512vl")]
1798	#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = `1`))]
1799	#[rustc_legacy_const_generics(`4`)]
1800	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1801	pub fn _mm256_mask_insertf64x2<const IMM8: i32>(
1802	src: __m256d,
1803	k: __mmask8,
1804	a: __m256d,
1805	b: __m128d,
1806	) -> __m256d {
1807	unsafe {
1808	static_assert_uimm_bits!(IMM8, `1`);
1809	let c: __m256d = _mm256_insertf64x2::<IMM8>(a, b);
1810	transmute(src:simd_select_bitmask(m:k, yes:c.as_f64x4(), no:src.as_f64x4()))
1811	}
1812	}
1813
1814	/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1815	/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1816	/// (elements are zeroed out if the corresponding bit is not set).
1817	///
1818	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf64x2&ig_expand=3855)
1819	#[inline]
1820	#[target_feature(enable = "avx512dq,avx512vl")]
1821	#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = `1`))]
1822	#[rustc_legacy_const_generics(`3`)]
1823	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1824	pub fn _mm256_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m128d) -> __m256d {
1825	unsafe {
1826	static_assert_uimm_bits!(IMM8, `1`);
1827	let c: f64x4 = _mm256_insertf64x2::<IMM8>(a, b).as_f64x4();
1828	transmute(src:simd_select_bitmask(m:k, yes:c, no:f64x4::ZERO))
1829	}
1830	}
1831
1832	/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1833	/// elements) from b into dst at the location specified by IMM8.
1834	///
1835	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x2&ig_expand=3856)
1836	#[inline]
1837	#[target_feature(enable = "avx512dq")]
1838	#[rustc_legacy_const_generics(`2`)]
1839	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1840	pub fn _mm512_insertf64x2<const IMM8: i32>(a: __m512d, b: __m128d) -> __m512d {
1841	unsafe {
1842	static_assert_uimm_bits!(IMM8, `2`);
1843	let b: __m512d = _mm512_castpd128_pd512(b);
1844	match IMM8 & `3` {
1845	`0` => simd_shuffle!(a, b, [`8`, `9`, `2`, `3`, `4`, `5`, `6`, `7`]),
1846	`1` => simd_shuffle!(a, b, [`0`, `1`, `8`, `9`, `4`, `5`, `6`, `7`]),
1847	`2` => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `6`, `7`]),
1848	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `4`, `5`, `8`, `9`]),
1849	}
1850	}
1851	}
1852
1853	/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1854	/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1855	/// (elements are copied from src if the corresponding bit is not set).
1856	///
1857	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x2&ig_expand=3857)
1858	#[inline]
1859	#[target_feature(enable = "avx512dq")]
1860	#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = `3`))]
1861	#[rustc_legacy_const_generics(`4`)]
1862	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1863	pub fn _mm512_mask_insertf64x2<const IMM8: i32>(
1864	src: __m512d,
1865	k: __mmask8,
1866	a: __m512d,
1867	b: __m128d,
1868	) -> __m512d {
1869	unsafe {
1870	static_assert_uimm_bits!(IMM8, `2`);
1871	let c: __m512d = _mm512_insertf64x2::<IMM8>(a, b);
1872	transmute(src:simd_select_bitmask(m:k, yes:c.as_f64x8(), no:src.as_f64x8()))
1873	}
1874	}
1875
1876	/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1877	/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1878	/// (elements are zeroed out if the corresponding bit is not set).
1879	///
1880	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x2&ig_expand=3858)
1881	#[inline]
1882	#[target_feature(enable = "avx512dq")]
1883	#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = `3`))]
1884	#[rustc_legacy_const_generics(`3`)]
1885	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1886	pub fn _mm512_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m128d) -> __m512d {
1887	unsafe {
1888	static_assert_uimm_bits!(IMM8, `2`);
1889	let c: f64x8 = _mm512_insertf64x2::<IMM8>(a, b).as_f64x8();
1890	transmute(src:simd_select_bitmask(m:k, yes:c, no:f64x8::ZERO))
1891	}
1892	}
1893
1894	/// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the
1895	/// location specified by IMM8.
1896	///
1897	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x8&ig_expand=3869)
1898	#[inline]
1899	#[target_feature(enable = "avx512dq")]
1900	#[rustc_legacy_const_generics(`2`)]
1901	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1902	pub fn _mm512_inserti32x8<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
1903	unsafe {
1904	static_assert_uimm_bits!(IMM8, `1`);
1905	let a: i32x16 = a.as_i32x16();
1906	let b: i32x16 = _mm512_castsi256_si512(b).as_i32x16();
1907	let r: i32x16 = match IMM8 & `1` {
1908	`0` => {
1909	simd_shuffle!(
1910	a,
1911	b,
1912	[`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]
1913	)
1914	}
1915	_ => {
1916	simd_shuffle!(
1917	a,
1918	b,
1919	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`]
1920	)
1921	}
1922	};
1923	transmute(src:r)
1924	}
1925	}
1926
1927	/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
1928	/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
1929	/// the corresponding bit is not set).
1930	///
1931	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x8&ig_expand=3870)
1932	#[inline]
1933	#[target_feature(enable = "avx512dq")]
1934	#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = `1`))]
1935	#[rustc_legacy_const_generics(`4`)]
1936	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1937	pub fn _mm512_mask_inserti32x8<const IMM8: i32>(
1938	src: __m512i,
1939	k: __mmask16,
1940	a: __m512i,
1941	b: __m256i,
1942	) -> __m512i {
1943	unsafe {
1944	static_assert_uimm_bits!(IMM8, `1`);
1945	let c: __m512i = _mm512_inserti32x8::<IMM8>(a, b);
1946	transmute(src:simd_select_bitmask(m:k, yes:c.as_i32x16(), no:src.as_i32x16()))
1947	}
1948	}
1949
1950	/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
1951	/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
1952	/// corresponding bit is not set).
1953	///
1954	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x8&ig_expand=3871)
1955	#[inline]
1956	#[target_feature(enable = "avx512dq")]
1957	#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = `1`))]
1958	#[rustc_legacy_const_generics(`3`)]
1959	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1960	pub fn _mm512_maskz_inserti32x8<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m256i) -> __m512i {
1961	unsafe {
1962	static_assert_uimm_bits!(IMM8, `1`);
1963	let c: i32x16 = _mm512_inserti32x8::<IMM8>(a, b).as_i32x16();
1964	transmute(src:simd_select_bitmask(m:k, yes:c, no:i32x16::ZERO))
1965	}
1966	}
1967
1968	/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
1969	/// location specified by IMM8.
1970	///
1971	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti64x2&ig_expand=3872)
1972	#[inline]
1973	#[target_feature(enable = "avx512dq,avx512vl")]
1974	#[rustc_legacy_const_generics(`2`)]
1975	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1976	pub fn _mm256_inserti64x2<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
1977	unsafe {
1978	static_assert_uimm_bits!(IMM8, `1`);
1979	let a: i64x4 = a.as_i64x4();
1980	let b: i64x4 = _mm256_castsi128_si256(b).as_i64x4();
1981	match IMM8 & `1` {
1982	`0` => simd_shuffle!(a, b, [`4`, `5`, `2`, `3`]),
1983	_ => simd_shuffle!(a, b, [`0`, `1`, `4`, `5`]),
1984	}
1985	}
1986	}
1987
1988	/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
1989	/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
1990	/// the corresponding bit is not set).
1991	///
1992	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti64x2&ig_expand=3873)
1993	#[inline]
1994	#[target_feature(enable = "avx512dq,avx512vl")]
1995	#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = `1`))]
1996	#[rustc_legacy_const_generics(`4`)]
1997	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1998	pub fn _mm256_mask_inserti64x2<const IMM8: i32>(
1999	src: __m256i,
2000	k: __mmask8,
2001	a: __m256i,
2002	b: __m128i,
2003	) -> __m256i {
2004	unsafe {
2005	static_assert_uimm_bits!(IMM8, `1`);
2006	let c: __m256i = _mm256_inserti64x2::<IMM8>(a, b);
2007	transmute(src:simd_select_bitmask(m:k, yes:c.as_i64x4(), no:src.as_i64x4()))
2008	}
2009	}
2010
2011	/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2012	/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2013	/// corresponding bit is not set).
2014	///
2015	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti64x2&ig_expand=3874)
2016	#[inline]
2017	#[target_feature(enable = "avx512dq,avx512vl")]
2018	#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = `1`))]
2019	#[rustc_legacy_const_generics(`3`)]
2020	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2021	pub fn _mm256_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
2022	unsafe {
2023	static_assert_uimm_bits!(IMM8, `1`);
2024	let c: i64x4 = _mm256_inserti64x2::<IMM8>(a, b).as_i64x4();
2025	transmute(src:simd_select_bitmask(m:k, yes:c, no:i64x4::ZERO))
2026	}
2027	}
2028
2029	/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
2030	/// location specified by IMM8.
2031	///
2032	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x2&ig_expand=3875)
2033	#[inline]
2034	#[target_feature(enable = "avx512dq")]
2035	#[rustc_legacy_const_generics(`2`)]
2036	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2037	pub fn _mm512_inserti64x2<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
2038	unsafe {
2039	static_assert_uimm_bits!(IMM8, `2`);
2040	let a: i64x8 = a.as_i64x8();
2041	let b: i64x8 = _mm512_castsi128_si512(b).as_i64x8();
2042	match IMM8 & `3` {
2043	`0` => simd_shuffle!(a, b, [`8`, `9`, `2`, `3`, `4`, `5`, `6`, `7`]),
2044	`1` => simd_shuffle!(a, b, [`0`, `1`, `8`, `9`, `4`, `5`, `6`, `7`]),
2045	`2` => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `6`, `7`]),
2046	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `4`, `5`, `8`, `9`]),
2047	}
2048	}
2049	}
2050
2051	/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2052	/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
2053	/// the corresponding bit is not set).
2054	///
2055	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x2&ig_expand=3876)
2056	#[inline]
2057	#[target_feature(enable = "avx512dq")]
2058	#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = `3`))]
2059	#[rustc_legacy_const_generics(`4`)]
2060	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2061	pub fn _mm512_mask_inserti64x2<const IMM8: i32>(
2062	src: __m512i,
2063	k: __mmask8,
2064	a: __m512i,
2065	b: __m128i,
2066	) -> __m512i {
2067	unsafe {
2068	static_assert_uimm_bits!(IMM8, `2`);
2069	let c: __m512i = _mm512_inserti64x2::<IMM8>(a, b);
2070	transmute(src:simd_select_bitmask(m:k, yes:c.as_i64x8(), no:src.as_i64x8()))
2071	}
2072	}
2073
2074	/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2075	/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2076	/// corresponding bit is not set).
2077	///
2078	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x2&ig_expand=3877)
2079	#[inline]
2080	#[target_feature(enable = "avx512dq")]
2081	#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = `3`))]
2082	#[rustc_legacy_const_generics(`3`)]
2083	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2084	pub fn _mm512_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m128i) -> __m512i {
2085	unsafe {
2086	static_assert_uimm_bits!(IMM8, `2`);
2087	let c: i64x8 = _mm512_inserti64x2::<IMM8>(a, b).as_i64x8();
2088	transmute(src:simd_select_bitmask(m:k, yes:c, no:i64x8::ZERO))
2089	}
2090	}
2091
2092	// Convert
2093
2094	/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2095	/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2096	///
2097	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2098	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2099	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2100	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2101	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2102	///
2103	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_pd&ig_expand=1437)
2104	#[inline]
2105	#[target_feature(enable = "avx512dq")]
2106	#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = `8`))]
2107	#[rustc_legacy_const_generics(`1`)]
2108	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2109	pub fn _mm512_cvt_roundepi64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
2110	unsafe {
2111	static_assert_rounding!(ROUNDING);
2112	transmute(src:vcvtqq2pd_512(a.as_i64x8(), ROUNDING))
2113	}
2114	}
2115
2116	/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2117	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2118	/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2119	///
2120	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2121	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2122	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2123	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2124	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2125	///
2126	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_pd&ig_expand=1438)
2127	#[inline]
2128	#[target_feature(enable = "avx512dq")]
2129	#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = `8`))]
2130	#[rustc_legacy_const_generics(`3`)]
2131	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2132	pub fn _mm512_mask_cvt_roundepi64_pd<const ROUNDING: i32>(
2133	src: __m512d,
2134	k: __mmask8,
2135	a: __m512i,
2136	) -> __m512d {
2137	unsafe {
2138	static_assert_rounding!(ROUNDING);
2139	let b: f64x8 = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
2140	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8()))
2141	}
2142	}
2143
2144	/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2145	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2146	/// Rounding is done according to the ROUNDING parameter, which can be one of:
2147	///
2148	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2149	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2150	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2151	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2152	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2153	///
2154	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_pd&ig_expand=1439)
2155	#[inline]
2156	#[target_feature(enable = "avx512dq")]
2157	#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = `8`))]
2158	#[rustc_legacy_const_generics(`2`)]
2159	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2160	pub fn _mm512_maskz_cvt_roundepi64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d {
2161	unsafe {
2162	static_assert_rounding!(ROUNDING);
2163	let b: f64x8 = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
2164	transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO))
2165	}
2166	}
2167
2168	/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2169	/// and store the results in dst.
2170	///
2171	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_pd&ig_expand=1705)
2172	#[inline]
2173	#[target_feature(enable = "avx512dq,avx512vl")]
2174	#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2175	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2176	pub fn _mm_cvtepi64_pd(a: __m128i) -> __m128d {
2177	unsafe { transmute(src:vcvtqq2pd_128(a.as_i64x2(), _MM_FROUND_CUR_DIRECTION)) }
2178	}
2179
2180	/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2181	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2182	/// not set).
2183	///
2184	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_pd&ig_expand=1706)
2185	#[inline]
2186	#[target_feature(enable = "avx512dq,avx512vl")]
2187	#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2188	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2189	pub fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
2190	unsafe {
2191	let b: f64x2 = _mm_cvtepi64_pd(a).as_f64x2();
2192	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x2()))
2193	}
2194	}
2195
2196	/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2197	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2198	///
2199	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_pd&ig_expand=1707)
2200	#[inline]
2201	#[target_feature(enable = "avx512dq,avx512vl")]
2202	#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2203	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2204	pub fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d {
2205	unsafe {
2206	let b: f64x2 = _mm_cvtepi64_pd(a).as_f64x2();
2207	transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x2::ZERO))
2208	}
2209	}
2210
2211	/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2212	/// and store the results in dst.
2213	///
2214	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_pd&ig_expand=1708)
2215	#[inline]
2216	#[target_feature(enable = "avx512dq,avx512vl")]
2217	#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2218	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2219	pub fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d {
2220	unsafe { transmute(src:vcvtqq2pd_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) }
2221	}
2222
2223	/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2224	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2225	/// not set).
2226	///
2227	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_pd&ig_expand=1709)
2228	#[inline]
2229	#[target_feature(enable = "avx512dq,avx512vl")]
2230	#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2231	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2232	pub fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
2233	unsafe {
2234	let b: f64x4 = _mm256_cvtepi64_pd(a).as_f64x4();
2235	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x4()))
2236	}
2237	}
2238
2239	/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2240	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2241	///
2242	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_pd&ig_expand=1710)
2243	#[inline]
2244	#[target_feature(enable = "avx512dq,avx512vl")]
2245	#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2246	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2247	pub fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d {
2248	unsafe {
2249	let b: f64x4 = _mm256_cvtepi64_pd(a).as_f64x4();
2250	transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x4::ZERO))
2251	}
2252	}
2253
2254	/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2255	/// and store the results in dst.
2256	///
2257	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_pd&ig_expand=1711)
2258	#[inline]
2259	#[target_feature(enable = "avx512dq")]
2260	#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2261	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2262	pub fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d {
2263	unsafe { transmute(src:vcvtqq2pd_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) }
2264	}
2265
2266	/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2267	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2268	/// not set).
2269	///
2270	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_pd&ig_expand=1712)
2271	#[inline]
2272	#[target_feature(enable = "avx512dq")]
2273	#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2274	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2275	pub fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
2276	unsafe {
2277	let b: f64x8 = _mm512_cvtepi64_pd(a).as_f64x8();
2278	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8()))
2279	}
2280	}
2281
2282	/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2283	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2284	///
2285	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_pd&ig_expand=1713)
2286	#[inline]
2287	#[target_feature(enable = "avx512dq")]
2288	#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2289	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2290	pub fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d {
2291	unsafe {
2292	let b: f64x8 = _mm512_cvtepi64_pd(a).as_f64x8();
2293	transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO))
2294	}
2295	}
2296
2297	/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2298	/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2299	///
2300	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2301	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2302	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2303	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2304	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2305	///
2306	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_ps&ig_expand=1443)
2307	#[inline]
2308	#[target_feature(enable = "avx512dq")]
2309	#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = `8`))]
2310	#[rustc_legacy_const_generics(`1`)]
2311	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2312	pub fn _mm512_cvt_roundepi64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
2313	unsafe {
2314	static_assert_rounding!(ROUNDING);
2315	transmute(src:vcvtqq2ps_512(a.as_i64x8(), ROUNDING))
2316	}
2317	}
2318
2319	/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2320	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2321	/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2322	///
2323	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2324	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2325	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2326	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2327	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2328	///
2329	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_ps&ig_expand=1444)
2330	#[inline]
2331	#[target_feature(enable = "avx512dq")]
2332	#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = `8`))]
2333	#[rustc_legacy_const_generics(`3`)]
2334	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2335	pub fn _mm512_mask_cvt_roundepi64_ps<const ROUNDING: i32>(
2336	src: __m256,
2337	k: __mmask8,
2338	a: __m512i,
2339	) -> __m256 {
2340	unsafe {
2341	static_assert_rounding!(ROUNDING);
2342	let b: f32x8 = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
2343	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8()))
2344	}
2345	}
2346
2347	/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2348	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2349	/// Rounding is done according to the ROUNDING parameter, which can be one of:
2350	///
2351	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2352	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2353	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2354	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2355	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2356	///
2357	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_ps&ig_expand=1445)
2358	#[inline]
2359	#[target_feature(enable = "avx512dq")]
2360	#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = `8`))]
2361	#[rustc_legacy_const_generics(`2`)]
2362	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2363	pub fn _mm512_maskz_cvt_roundepi64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 {
2364	unsafe {
2365	static_assert_rounding!(ROUNDING);
2366	let b: f32x8 = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
2367	transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO))
2368	}
2369	}
2370
2371	/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2372	/// and store the results in dst.
2373	///
2374	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_ps&ig_expand=1723)
2375	#[inline]
2376	#[target_feature(enable = "avx512dq,avx512vl")]
2377	#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2378	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2379	pub fn _mm_cvtepi64_ps(a: __m128i) -> __m128 {
2380	_mm_mask_cvtepi64_ps(src:_mm_undefined_ps(), k:`0xff`, a)
2381	}
2382
2383	/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2384	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2385	/// not set).
2386	///
2387	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_ps&ig_expand=1724)
2388	#[inline]
2389	#[target_feature(enable = "avx512dq,avx512vl")]
2390	#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2391	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2392	pub fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
2393	unsafe { transmute(src:vcvtqq2ps_128(a.as_i64x2(), src.as_f32x4(), k)) }
2394	}
2395
2396	/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2397	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2398	///
2399	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_ps&ig_expand=1725)
2400	#[inline]
2401	#[target_feature(enable = "avx512dq,avx512vl")]
2402	#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2403	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2404	pub fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 {
2405	_mm_mask_cvtepi64_ps(src:_mm_setzero_ps(), k, a)
2406	}
2407
2408	/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2409	/// and store the results in dst.
2410	///
2411	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_ps&ig_expand=1726)
2412	#[inline]
2413	#[target_feature(enable = "avx512dq,avx512vl")]
2414	#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2415	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2416	pub fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 {
2417	unsafe { transmute(src:vcvtqq2ps_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) }
2418	}
2419
2420	/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2421	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2422	/// not set).
2423	///
2424	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_ps&ig_expand=1727)
2425	#[inline]
2426	#[target_feature(enable = "avx512dq,avx512vl")]
2427	#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2428	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2429	pub fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
2430	unsafe {
2431	let b: f32x4 = _mm256_cvtepi64_ps(a).as_f32x4();
2432	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x4()))
2433	}
2434	}
2435
2436	/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2437	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2438	///
2439	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_ps&ig_expand=1728)
2440	#[inline]
2441	#[target_feature(enable = "avx512dq,avx512vl")]
2442	#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2443	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2444	pub fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 {
2445	unsafe {
2446	let b: f32x4 = _mm256_cvtepi64_ps(a).as_f32x4();
2447	transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x4::ZERO))
2448	}
2449	}
2450
2451	/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2452	/// and store the results in dst.
2453	///
2454	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_ps&ig_expand=1729)
2455	#[inline]
2456	#[target_feature(enable = "avx512dq")]
2457	#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2458	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2459	pub fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 {
2460	unsafe { transmute(src:vcvtqq2ps_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) }
2461	}
2462
2463	/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2464	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2465	/// not set).
2466	///
2467	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_ps&ig_expand=1730)
2468	#[inline]
2469	#[target_feature(enable = "avx512dq")]
2470	#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2471	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2472	pub fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
2473	unsafe {
2474	let b: f32x8 = _mm512_cvtepi64_ps(a).as_f32x8();
2475	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8()))
2476	}
2477	}
2478
2479	/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2480	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2481	///
2482	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_ps&ig_expand=1731)
2483	#[inline]
2484	#[target_feature(enable = "avx512dq")]
2485	#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2486	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2487	pub fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 {
2488	unsafe {
2489	let b: f32x8 = _mm512_cvtepi64_ps(a).as_f32x8();
2490	transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO))
2491	}
2492	}
2493
2494	/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2495	/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2496	///
2497	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2498	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2499	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2500	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2501	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2502	///
2503	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_pd&ig_expand=1455)
2504	#[inline]
2505	#[target_feature(enable = "avx512dq")]
2506	#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = `8`))]
2507	#[rustc_legacy_const_generics(`1`)]
2508	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2509	pub fn _mm512_cvt_roundepu64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
2510	unsafe {
2511	static_assert_rounding!(ROUNDING);
2512	transmute(src:vcvtuqq2pd_512(a.as_u64x8(), ROUNDING))
2513	}
2514	}
2515
2516	/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2517	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2518	/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2519	///
2520	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2521	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2522	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2523	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2524	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2525	///
2526	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_pd&ig_expand=1456)
2527	#[inline]
2528	#[target_feature(enable = "avx512dq")]
2529	#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = `8`))]
2530	#[rustc_legacy_const_generics(`3`)]
2531	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2532	pub fn _mm512_mask_cvt_roundepu64_pd<const ROUNDING: i32>(
2533	src: __m512d,
2534	k: __mmask8,
2535	a: __m512i,
2536	) -> __m512d {
2537	unsafe {
2538	static_assert_rounding!(ROUNDING);
2539	let b: f64x8 = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
2540	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8()))
2541	}
2542	}
2543
2544	/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2545	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2546	/// Rounding is done according to the ROUNDING parameter, which can be one of:
2547	///
2548	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2549	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2550	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2551	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2552	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2553	///
2554	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_pd&ig_expand=1457)
2555	#[inline]
2556	#[target_feature(enable = "avx512dq")]
2557	#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = `8`))]
2558	#[rustc_legacy_const_generics(`2`)]
2559	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2560	pub fn _mm512_maskz_cvt_roundepu64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d {
2561	unsafe {
2562	static_assert_rounding!(ROUNDING);
2563	let b: f64x8 = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
2564	transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO))
2565	}
2566	}
2567
2568	/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2569	/// and store the results in dst.
2570	///
2571	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_pd&ig_expand=1827)
2572	#[inline]
2573	#[target_feature(enable = "avx512dq,avx512vl")]
2574	#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2575	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2576	pub fn _mm_cvtepu64_pd(a: __m128i) -> __m128d {
2577	unsafe { transmute(src:vcvtuqq2pd_128(a.as_u64x2(), _MM_FROUND_CUR_DIRECTION)) }
2578	}
2579
2580	/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2581	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2582	/// not set).
2583	///
2584	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_pd&ig_expand=1828)
2585	#[inline]
2586	#[target_feature(enable = "avx512dq,avx512vl")]
2587	#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2588	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2589	pub fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
2590	unsafe {
2591	let b: f64x2 = _mm_cvtepu64_pd(a).as_f64x2();
2592	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x2()))
2593	}
2594	}
2595
2596	/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2597	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2598	///
2599	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_pd&ig_expand=1829)
2600	#[inline]
2601	#[target_feature(enable = "avx512dq,avx512vl")]
2602	#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2603	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2604	pub fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d {
2605	unsafe {
2606	let b: f64x2 = _mm_cvtepu64_pd(a).as_f64x2();
2607	transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x2::ZERO))
2608	}
2609	}
2610
2611	/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2612	/// and store the results in dst.
2613	///
2614	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_pd&ig_expand=1830)
2615	#[inline]
2616	#[target_feature(enable = "avx512dq,avx512vl")]
2617	#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2618	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2619	pub fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d {
2620	unsafe { transmute(src:vcvtuqq2pd_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) }
2621	}
2622
2623	/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2624	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2625	/// not set).
2626	///
2627	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_pd&ig_expand=1831)
2628	#[inline]
2629	#[target_feature(enable = "avx512dq,avx512vl")]
2630	#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2631	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2632	pub fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
2633	unsafe {
2634	let b: f64x4 = _mm256_cvtepu64_pd(a).as_f64x4();
2635	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x4()))
2636	}
2637	}
2638
2639	/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2640	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2641	///
2642	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_pd&ig_expand=1832)
2643	#[inline]
2644	#[target_feature(enable = "avx512dq,avx512vl")]
2645	#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2646	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2647	pub fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d {
2648	unsafe {
2649	let b: f64x4 = _mm256_cvtepu64_pd(a).as_f64x4();
2650	transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x4::ZERO))
2651	}
2652	}
2653
2654	/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2655	/// and store the results in dst.
2656	///
2657	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_pd&ig_expand=1833)
2658	#[inline]
2659	#[target_feature(enable = "avx512dq")]
2660	#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2661	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2662	pub fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d {
2663	unsafe { transmute(src:vcvtuqq2pd_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) }
2664	}
2665
2666	/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2667	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2668	/// not set).
2669	///
2670	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_pd&ig_expand=1834)
2671	#[inline]
2672	#[target_feature(enable = "avx512dq")]
2673	#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2674	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2675	pub fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
2676	unsafe {
2677	let b: f64x8 = _mm512_cvtepu64_pd(a).as_f64x8();
2678	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8()))
2679	}
2680	}
2681
2682	/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2683	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2684	///
2685	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_pd&ig_expand=1835)
2686	#[inline]
2687	#[target_feature(enable = "avx512dq")]
2688	#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2689	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2690	pub fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d {
2691	unsafe {
2692	let b: f64x8 = _mm512_cvtepu64_pd(a).as_f64x8();
2693	transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO))
2694	}
2695	}
2696
2697	/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2698	/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2699	///
2700	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2701	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2702	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2703	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2704	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2705	///
2706	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_ps&ig_expand=1461)
2707	#[inline]
2708	#[target_feature(enable = "avx512dq")]
2709	#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = `8`))]
2710	#[rustc_legacy_const_generics(`1`)]
2711	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2712	pub fn _mm512_cvt_roundepu64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
2713	unsafe {
2714	static_assert_rounding!(ROUNDING);
2715	transmute(src:vcvtuqq2ps_512(a.as_u64x8(), ROUNDING))
2716	}
2717	}
2718
2719	/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2720	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2721	/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2722	///
2723	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2724	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2725	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2726	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2727	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2728	///
2729	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_ps&ig_expand=1462)
2730	#[inline]
2731	#[target_feature(enable = "avx512dq")]
2732	#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = `8`))]
2733	#[rustc_legacy_const_generics(`3`)]
2734	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2735	pub fn _mm512_mask_cvt_roundepu64_ps<const ROUNDING: i32>(
2736	src: __m256,
2737	k: __mmask8,
2738	a: __m512i,
2739	) -> __m256 {
2740	unsafe {
2741	static_assert_rounding!(ROUNDING);
2742	let b: f32x8 = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
2743	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8()))
2744	}
2745	}
2746
2747	/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2748	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2749	/// Rounding is done according to the ROUNDING parameter, which can be one of:
2750	///
2751	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2752	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2753	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2754	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2755	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2756	///
2757	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_ps&ig_expand=1463)
2758	#[inline]
2759	#[target_feature(enable = "avx512dq")]
2760	#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = `8`))]
2761	#[rustc_legacy_const_generics(`2`)]
2762	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2763	pub fn _mm512_maskz_cvt_roundepu64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 {
2764	unsafe {
2765	static_assert_rounding!(ROUNDING);
2766	let b: f32x8 = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
2767	transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO))
2768	}
2769	}
2770
2771	/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2772	/// and store the results in dst.
2773	///
2774	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_ps&ig_expand=1845)
2775	#[inline]
2776	#[target_feature(enable = "avx512dq,avx512vl")]
2777	#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2778	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2779	pub fn _mm_cvtepu64_ps(a: __m128i) -> __m128 {
2780	_mm_mask_cvtepu64_ps(src:_mm_undefined_ps(), k:`0xff`, a)
2781	}
2782
2783	/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2784	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2785	/// not set).
2786	///
2787	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_ps&ig_expand=1846)
2788	#[inline]
2789	#[target_feature(enable = "avx512dq,avx512vl")]
2790	#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2791	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2792	pub fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
2793	unsafe { transmute(src:vcvtuqq2ps_128(a.as_u64x2(), src.as_f32x4(), k)) }
2794	}
2795
2796	/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2797	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2798	///
2799	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_ps&ig_expand=1847)
2800	#[inline]
2801	#[target_feature(enable = "avx512dq,avx512vl")]
2802	#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2803	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2804	pub fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 {
2805	_mm_mask_cvtepu64_ps(src:_mm_setzero_ps(), k, a)
2806	}
2807
2808	/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2809	/// and store the results in dst.
2810	///
2811	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_ps&ig_expand=1848)
2812	#[inline]
2813	#[target_feature(enable = "avx512dq,avx512vl")]
2814	#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2815	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2816	pub fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 {
2817	unsafe { transmute(src:vcvtuqq2ps_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) }
2818	}
2819
2820	/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2821	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2822	/// not set).
2823	///
2824	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_ps&ig_expand=1849)
2825	#[inline]
2826	#[target_feature(enable = "avx512dq,avx512vl")]
2827	#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2828	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2829	pub fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
2830	unsafe {
2831	let b: f32x4 = _mm256_cvtepu64_ps(a).as_f32x4();
2832	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x4()))
2833	}
2834	}
2835
2836	/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2837	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2838	///
2839	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_ps&ig_expand=1850)
2840	#[inline]
2841	#[target_feature(enable = "avx512dq,avx512vl")]
2842	#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2843	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2844	pub fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 {
2845	unsafe {
2846	let b: f32x4 = _mm256_cvtepu64_ps(a).as_f32x4();
2847	transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x4::ZERO))
2848	}
2849	}
2850
2851	/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2852	/// and store the results in dst.
2853	///
2854	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_ps&ig_expand=1851)
2855	#[inline]
2856	#[target_feature(enable = "avx512dq")]
2857	#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2858	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2859	pub fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 {
2860	unsafe { transmute(src:vcvtuqq2ps_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) }
2861	}
2862
2863	/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2864	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2865	/// not set).
2866	///
2867	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_ps&ig_expand=1852)
2868	#[inline]
2869	#[target_feature(enable = "avx512dq")]
2870	#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2871	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2872	pub fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
2873	unsafe {
2874	let b: f32x8 = _mm512_cvtepu64_ps(a).as_f32x8();
2875	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8()))
2876	}
2877	}
2878
2879	/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2880	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2881	///
2882	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_ps&ig_expand=1853)
2883	#[inline]
2884	#[target_feature(enable = "avx512dq")]
2885	#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2886	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2887	pub fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 {
2888	unsafe {
2889	let b: f32x8 = _mm512_cvtepu64_ps(a).as_f32x8();
2890	transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO))
2891	}
2892	}
2893
2894	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2895	/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2896	///
2897	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2898	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2899	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2900	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2901	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2902	///
2903	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi64&ig_expand=1472)
2904	#[inline]
2905	#[target_feature(enable = "avx512dq")]
2906	#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = `8`))]
2907	#[rustc_legacy_const_generics(`1`)]
2908	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2909	pub fn _mm512_cvt_roundpd_epi64<const ROUNDING: i32>(a: __m512d) -> __m512i {
2910	static_assert_rounding!(ROUNDING);
2911	_mm512_mask_cvt_roundpd_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xff`, a)
2912	}
2913
2914	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2915	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2916	/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2917	///
2918	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2919	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2920	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2921	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2922	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2923	///
2924	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi64&ig_expand=1473)
2925	#[inline]
2926	#[target_feature(enable = "avx512dq")]
2927	#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = `8`))]
2928	#[rustc_legacy_const_generics(`3`)]
2929	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2930	pub fn _mm512_mask_cvt_roundpd_epi64<const ROUNDING: i32>(
2931	src: __m512i,
2932	k: __mmask8,
2933	a: __m512d,
2934	) -> __m512i {
2935	unsafe {
2936	static_assert_rounding!(ROUNDING);
2937	transmute(src:vcvtpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, ROUNDING))
2938	}
2939	}
2940
2941	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2942	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2943	/// Rounding is done according to the ROUNDING parameter, which can be one of:
2944	///
2945	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2946	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2947	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2948	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2949	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2950	///
2951	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epi64&ig_expand=1474)
2952	#[inline]
2953	#[target_feature(enable = "avx512dq")]
2954	#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = `8`))]
2955	#[rustc_legacy_const_generics(`2`)]
2956	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2957	pub fn _mm512_maskz_cvt_roundpd_epi64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i {
2958	static_assert_rounding!(ROUNDING);
2959	_mm512_mask_cvt_roundpd_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
2960	}
2961
2962	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2963	/// and store the results in dst.
2964	///
2965	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi64&ig_expand=1941)
2966	#[inline]
2967	#[target_feature(enable = "avx512dq,avx512vl")]
2968	#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2969	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2970	pub fn _mm_cvtpd_epi64(a: __m128d) -> __m128i {
2971	_mm_mask_cvtpd_epi64(src:_mm_undefined_si128(), k:`0xff`, a)
2972	}
2973
2974	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2975	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2976	/// not set).
2977	///
2978	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi64&ig_expand=1942)
2979	#[inline]
2980	#[target_feature(enable = "avx512dq,avx512vl")]
2981	#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2982	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2983	pub fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
2984	unsafe { transmute(src:vcvtpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) }
2985	}
2986
2987	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2988	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2989	///
2990	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi64&ig_expand=1943)
2991	#[inline]
2992	#[target_feature(enable = "avx512dq,avx512vl")]
2993	#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2994	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2995	pub fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
2996	_mm_mask_cvtpd_epi64(src:_mm_setzero_si128(), k, a)
2997	}
2998
2999	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3000	/// and store the results in dst.
3001	///
3002	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi64&ig_expand=1944)
3003	#[inline]
3004	#[target_feature(enable = "avx512dq,avx512vl")]
3005	#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3006	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3007	pub fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i {
3008	_mm256_mask_cvtpd_epi64(src:_mm256_undefined_si256(), k:`0xff`, a)
3009	}
3010
3011	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3012	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3013	/// not set).
3014	///
3015	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi64&ig_expand=1945)
3016	#[inline]
3017	#[target_feature(enable = "avx512dq,avx512vl")]
3018	#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3019	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3020	pub fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3021	unsafe { transmute(src:vcvtpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) }
3022	}
3023
3024	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3025	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3026	///
3027	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi64&ig_expand=1946)
3028	#[inline]
3029	#[target_feature(enable = "avx512dq,avx512vl")]
3030	#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3031	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3032	pub fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
3033	_mm256_mask_cvtpd_epi64(src:_mm256_setzero_si256(), k, a)
3034	}
3035
3036	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3037	/// and store the results in dst.
3038	///
3039	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi64&ig_expand=1947)
3040	#[inline]
3041	#[target_feature(enable = "avx512dq")]
3042	#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3043	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3044	pub fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i {
3045	_mm512_mask_cvtpd_epi64(src:_mm512_undefined_epi32(), k:`0xff`, a)
3046	}
3047
3048	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3049	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3050	/// not set).
3051	///
3052	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi64&ig_expand=1948)
3053	#[inline]
3054	#[target_feature(enable = "avx512dq")]
3055	#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3056	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3057	pub fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3058	unsafe {
3059	transmute(src:vcvtpd2qq_512(
3060	a.as_f64x8(),
3061	src.as_i64x8(),
3062	k,
3063	_MM_FROUND_CUR_DIRECTION,
3064	))
3065	}
3066	}
3067
3068	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3069	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3070	///
3071	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi64&ig_expand=1949)
3072	#[inline]
3073	#[target_feature(enable = "avx512dq")]
3074	#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3075	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3076	pub fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
3077	_mm512_mask_cvtpd_epi64(src:_mm512_setzero_si512(), k, a)
3078	}
3079
3080	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3081	/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3082	///
3083	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3084	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3085	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3086	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3087	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3088	///
3089	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi64&ig_expand=1514)
3090	#[inline]
3091	#[target_feature(enable = "avx512dq")]
3092	#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = `8`))]
3093	#[rustc_legacy_const_generics(`1`)]
3094	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3095	pub fn _mm512_cvt_roundps_epi64<const ROUNDING: i32>(a: __m256) -> __m512i {
3096	static_assert_rounding!(ROUNDING);
3097	_mm512_mask_cvt_roundps_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xff`, a)
3098	}
3099
3100	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3101	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3102	/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3103	///
3104	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3105	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3106	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3107	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3108	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3109	///
3110	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi64&ig_expand=1515)
3111	#[inline]
3112	#[target_feature(enable = "avx512dq")]
3113	#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = `8`))]
3114	#[rustc_legacy_const_generics(`3`)]
3115	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3116	pub fn _mm512_mask_cvt_roundps_epi64<const ROUNDING: i32>(
3117	src: __m512i,
3118	k: __mmask8,
3119	a: __m256,
3120	) -> __m512i {
3121	unsafe {
3122	static_assert_rounding!(ROUNDING);
3123	transmute(src:vcvtps2qq_512(a.as_f32x8(), src.as_i64x8(), k, ROUNDING))
3124	}
3125	}
3126
3127	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3128	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3129	/// Rounding is done according to the ROUNDING parameter, which can be one of:
3130	///
3131	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3132	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3133	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3134	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3135	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3136	///
3137	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi64&ig_expand=1516)
3138	#[inline]
3139	#[target_feature(enable = "avx512dq")]
3140	#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = `8`))]
3141	#[rustc_legacy_const_generics(`2`)]
3142	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3143	pub fn _mm512_maskz_cvt_roundps_epi64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i {
3144	static_assert_rounding!(ROUNDING);
3145	_mm512_mask_cvt_roundps_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
3146	}
3147
3148	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3149	/// and store the results in dst.
3150	///
3151	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi64&ig_expand=2075)
3152	#[inline]
3153	#[target_feature(enable = "avx512dq,avx512vl")]
3154	#[cfg_attr(test, assert_instr(vcvtps2qq))]
3155	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3156	pub fn _mm_cvtps_epi64(a: __m128) -> __m128i {
3157	_mm_mask_cvtps_epi64(src:_mm_undefined_si128(), k:`0xff`, a)
3158	}
3159
3160	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3161	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3162	/// not set).
3163	///
3164	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi64&ig_expand=2076)
3165	#[inline]
3166	#[target_feature(enable = "avx512dq,avx512vl")]
3167	#[cfg_attr(test, assert_instr(vcvtps2qq))]
3168	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3169	pub fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3170	unsafe { transmute(src:vcvtps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) }
3171	}
3172
3173	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3174	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3175	///
3176	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi64&ig_expand=2077)
3177	#[inline]
3178	#[target_feature(enable = "avx512dq,avx512vl")]
3179	#[cfg_attr(test, assert_instr(vcvtps2qq))]
3180	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3181	pub fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i {
3182	_mm_mask_cvtps_epi64(src:_mm_setzero_si128(), k, a)
3183	}
3184
3185	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3186	/// and store the results in dst.
3187	///
3188	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi64&ig_expand=2078)
3189	#[inline]
3190	#[target_feature(enable = "avx512dq,avx512vl")]
3191	#[cfg_attr(test, assert_instr(vcvtps2qq))]
3192	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3193	pub fn _mm256_cvtps_epi64(a: __m128) -> __m256i {
3194	_mm256_mask_cvtps_epi64(src:_mm256_undefined_si256(), k:`0xff`, a)
3195	}
3196
3197	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3198	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3199	/// not set).
3200	///
3201	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi64&ig_expand=2079)
3202	#[inline]
3203	#[target_feature(enable = "avx512dq,avx512vl")]
3204	#[cfg_attr(test, assert_instr(vcvtps2qq))]
3205	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3206	pub fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3207	unsafe { transmute(src:vcvtps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) }
3208	}
3209
3210	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3211	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3212	///
3213	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi64&ig_expand=2080)
3214	#[inline]
3215	#[target_feature(enable = "avx512dq,avx512vl")]
3216	#[cfg_attr(test, assert_instr(vcvtps2qq))]
3217	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3218	pub fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i {
3219	_mm256_mask_cvtps_epi64(src:_mm256_setzero_si256(), k, a)
3220	}
3221
3222	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3223	/// and store the results in dst.
3224	///
3225	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi64&ig_expand=2081)
3226	#[inline]
3227	#[target_feature(enable = "avx512dq")]
3228	#[cfg_attr(test, assert_instr(vcvtps2qq))]
3229	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3230	pub fn _mm512_cvtps_epi64(a: __m256) -> __m512i {
3231	_mm512_mask_cvtps_epi64(src:_mm512_undefined_epi32(), k:`0xff`, a)
3232	}
3233
3234	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3235	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3236	/// not set).
3237	///
3238	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi64&ig_expand=2082)
3239	#[inline]
3240	#[target_feature(enable = "avx512dq")]
3241	#[cfg_attr(test, assert_instr(vcvtps2qq))]
3242	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3243	pub fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3244	unsafe {
3245	transmute(src:vcvtps2qq_512(
3246	a.as_f32x8(),
3247	src.as_i64x8(),
3248	k,
3249	_MM_FROUND_CUR_DIRECTION,
3250	))
3251	}
3252	}
3253
3254	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3255	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3256	///
3257	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi64&ig_expand=2083)
3258	#[inline]
3259	#[target_feature(enable = "avx512dq")]
3260	#[cfg_attr(test, assert_instr(vcvtps2qq))]
3261	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3262	pub fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i {
3263	_mm512_mask_cvtps_epi64(src:_mm512_setzero_si512(), k, a)
3264	}
3265
3266	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3267	/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3268	///
3269	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3270	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3271	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3272	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3273	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3274	///
3275	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu64&ig_expand=1478)
3276	#[inline]
3277	#[target_feature(enable = "avx512dq")]
3278	#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = `8`))]
3279	#[rustc_legacy_const_generics(`1`)]
3280	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3281	pub fn _mm512_cvt_roundpd_epu64<const ROUNDING: i32>(a: __m512d) -> __m512i {
3282	static_assert_rounding!(ROUNDING);
3283	_mm512_mask_cvt_roundpd_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xff`, a)
3284	}
3285
3286	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3287	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3288	/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3289	///
3290	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3291	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3292	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3293	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3294	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3295	///
3296	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu64&ig_expand=1479)
3297	#[inline]
3298	#[target_feature(enable = "avx512dq")]
3299	#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = `8`))]
3300	#[rustc_legacy_const_generics(`3`)]
3301	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3302	pub fn _mm512_mask_cvt_roundpd_epu64<const ROUNDING: i32>(
3303	src: __m512i,
3304	k: __mmask8,
3305	a: __m512d,
3306	) -> __m512i {
3307	unsafe {
3308	static_assert_rounding!(ROUNDING);
3309	transmute(src:vcvtpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, ROUNDING))
3310	}
3311	}
3312
3313	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3314	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3315	/// Rounding is done according to the ROUNDING parameter, which can be one of:
3316	///
3317	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3318	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3319	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3320	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3321	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3322	///
3323	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epu64&ig_expand=1480)
3324	#[inline]
3325	#[target_feature(enable = "avx512dq")]
3326	#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = `8`))]
3327	#[rustc_legacy_const_generics(`2`)]
3328	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3329	pub fn _mm512_maskz_cvt_roundpd_epu64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i {
3330	static_assert_rounding!(ROUNDING);
3331	_mm512_mask_cvt_roundpd_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
3332	}
3333
3334	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3335	/// and store the results in dst.
3336	///
3337	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu64&ig_expand=1959)
3338	#[inline]
3339	#[target_feature(enable = "avx512dq,avx512vl")]
3340	#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3341	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3342	pub fn _mm_cvtpd_epu64(a: __m128d) -> __m128i {
3343	_mm_mask_cvtpd_epu64(src:_mm_undefined_si128(), k:`0xff`, a)
3344	}
3345
3346	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3347	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3348	/// not set).
3349	///
3350	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu64&ig_expand=1960)
3351	#[inline]
3352	#[target_feature(enable = "avx512dq,avx512vl")]
3353	#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3354	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3355	pub fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3356	unsafe { transmute(src:vcvtpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) }
3357	}
3358
3359	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3360	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3361	///
3362	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu64&ig_expand=1961)
3363	#[inline]
3364	#[target_feature(enable = "avx512dq,avx512vl")]
3365	#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3366	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3367	pub fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
3368	_mm_mask_cvtpd_epu64(src:_mm_setzero_si128(), k, a)
3369	}
3370
3371	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3372	/// and store the results in dst.
3373	///
3374	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu64&ig_expand=1962)
3375	#[inline]
3376	#[target_feature(enable = "avx512dq,avx512vl")]
3377	#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3378	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3379	pub fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i {
3380	_mm256_mask_cvtpd_epu64(src:_mm256_undefined_si256(), k:`0xff`, a)
3381	}
3382
3383	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3384	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3385	/// not set).
3386	///
3387	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu64&ig_expand=1963)
3388	#[inline]
3389	#[target_feature(enable = "avx512dq,avx512vl")]
3390	#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3391	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3392	pub fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3393	unsafe { transmute(src:vcvtpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) }
3394	}
3395
3396	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3397	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3398	///
3399	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu64&ig_expand=1964)
3400	#[inline]
3401	#[target_feature(enable = "avx512dq,avx512vl")]
3402	#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3403	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3404	pub fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
3405	_mm256_mask_cvtpd_epu64(src:_mm256_setzero_si256(), k, a)
3406	}
3407
3408	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3409	/// and store the results in dst.
3410	///
3411	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu64&ig_expand=1965)
3412	#[inline]
3413	#[target_feature(enable = "avx512dq")]
3414	#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3415	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3416	pub fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i {
3417	_mm512_mask_cvtpd_epu64(src:_mm512_undefined_epi32(), k:`0xff`, a)
3418	}
3419
3420	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3421	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3422	/// not set).
3423	///
3424	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu64&ig_expand=1966)
3425	#[inline]
3426	#[target_feature(enable = "avx512dq")]
3427	#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3428	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3429	pub fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3430	unsafe {
3431	transmute(src:vcvtpd2uqq_512(
3432	a.as_f64x8(),
3433	src.as_u64x8(),
3434	k,
3435	_MM_FROUND_CUR_DIRECTION,
3436	))
3437	}
3438	}
3439
3440	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3441	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3442	///
3443	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu64&ig_expand=1967)
3444	#[inline]
3445	#[target_feature(enable = "avx512dq")]
3446	#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3447	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3448	pub fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
3449	_mm512_mask_cvtpd_epu64(src:_mm512_setzero_si512(), k, a)
3450	}
3451
3452	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3453	/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3454	///
3455	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3456	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3457	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3458	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3459	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3460	///
3461	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu64&ig_expand=1520)
3462	#[inline]
3463	#[target_feature(enable = "avx512dq")]
3464	#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = `8`))]
3465	#[rustc_legacy_const_generics(`1`)]
3466	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3467	pub fn _mm512_cvt_roundps_epu64<const ROUNDING: i32>(a: __m256) -> __m512i {
3468	static_assert_rounding!(ROUNDING);
3469	_mm512_mask_cvt_roundps_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xff`, a)
3470	}
3471
3472	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3473	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3474	/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3475	///
3476	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3477	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3478	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3479	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3480	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3481	///
3482	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu64&ig_expand=1521)
3483	#[inline]
3484	#[target_feature(enable = "avx512dq")]
3485	#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = `8`))]
3486	#[rustc_legacy_const_generics(`3`)]
3487	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3488	pub fn _mm512_mask_cvt_roundps_epu64<const ROUNDING: i32>(
3489	src: __m512i,
3490	k: __mmask8,
3491	a: __m256,
3492	) -> __m512i {
3493	unsafe {
3494	static_assert_rounding!(ROUNDING);
3495	transmute(src:vcvtps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, ROUNDING))
3496	}
3497	}
3498
3499	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3500	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3501	/// Rounding is done according to the ROUNDING parameter, which can be one of:
3502	///
3503	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3504	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3505	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3506	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3507	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3508	///
3509	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu64&ig_expand=1522)
3510	#[inline]
3511	#[target_feature(enable = "avx512dq")]
3512	#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = `8`))]
3513	#[rustc_legacy_const_generics(`2`)]
3514	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3515	pub fn _mm512_maskz_cvt_roundps_epu64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i {
3516	static_assert_rounding!(ROUNDING);
3517	_mm512_mask_cvt_roundps_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
3518	}
3519
3520	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3521	/// and store the results in dst.
3522	///
3523	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu64&ig_expand=2093)
3524	#[inline]
3525	#[target_feature(enable = "avx512dq,avx512vl")]
3526	#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3527	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3528	pub fn _mm_cvtps_epu64(a: __m128) -> __m128i {
3529	_mm_mask_cvtps_epu64(src:_mm_undefined_si128(), k:`0xff`, a)
3530	}
3531
3532	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3533	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3534	/// not set).
3535	///
3536	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu64&ig_expand=2094)
3537	#[inline]
3538	#[target_feature(enable = "avx512dq,avx512vl")]
3539	#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3540	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3541	pub fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3542	unsafe { transmute(src:vcvtps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) }
3543	}
3544
3545	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3546	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3547	///
3548	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu64&ig_expand=2095)
3549	#[inline]
3550	#[target_feature(enable = "avx512dq,avx512vl")]
3551	#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3552	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3553	pub fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i {
3554	_mm_mask_cvtps_epu64(src:_mm_setzero_si128(), k, a)
3555	}
3556
3557	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3558	/// and store the results in dst.
3559	///
3560	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu64&ig_expand=2096)
3561	#[inline]
3562	#[target_feature(enable = "avx512dq,avx512vl")]
3563	#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3564	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3565	pub fn _mm256_cvtps_epu64(a: __m128) -> __m256i {
3566	_mm256_mask_cvtps_epu64(src:_mm256_undefined_si256(), k:`0xff`, a)
3567	}
3568
3569	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3570	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3571	/// not set).
3572	///
3573	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu64&ig_expand=2097)
3574	#[inline]
3575	#[target_feature(enable = "avx512dq,avx512vl")]
3576	#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3577	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3578	pub fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3579	unsafe { transmute(src:vcvtps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) }
3580	}
3581
3582	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3583	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3584	///
3585	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu64&ig_expand=2098)
3586	#[inline]
3587	#[target_feature(enable = "avx512dq,avx512vl")]
3588	#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3589	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3590	pub fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i {
3591	_mm256_mask_cvtps_epu64(src:_mm256_setzero_si256(), k, a)
3592	}
3593
3594	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3595	/// and store the results in dst.
3596	///
3597	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu64&ig_expand=2099)
3598	#[inline]
3599	#[target_feature(enable = "avx512dq")]
3600	#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3601	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3602	pub fn _mm512_cvtps_epu64(a: __m256) -> __m512i {
3603	_mm512_mask_cvtps_epu64(src:_mm512_undefined_epi32(), k:`0xff`, a)
3604	}
3605
3606	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3607	/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3608	/// not set).
3609	///
3610	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu64&ig_expand=2100)
3611	#[inline]
3612	#[target_feature(enable = "avx512dq")]
3613	#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3614	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3615	pub fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3616	unsafe {
3617	transmute(src:vcvtps2uqq_512(
3618	a.as_f32x8(),
3619	src.as_u64x8(),
3620	k,
3621	_MM_FROUND_CUR_DIRECTION,
3622	))
3623	}
3624	}
3625
3626	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3627	/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3628	///
3629	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu64&ig_expand=2101)
3630	#[inline]
3631	#[target_feature(enable = "avx512dq")]
3632	#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3633	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3634	pub fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i {
3635	_mm512_mask_cvtps_epu64(src:_mm512_setzero_si512(), k, a)
3636	}
3637
3638	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3639	/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3640	/// to the sae parameter.
3641	///
3642	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi64&ig_expand=2264)
3643	#[inline]
3644	#[target_feature(enable = "avx512dq")]
3645	#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = `8`))]
3646	#[rustc_legacy_const_generics(`1`)]
3647	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3648	pub fn _mm512_cvtt_roundpd_epi64<const SAE: i32>(a: __m512d) -> __m512i {
3649	static_assert_sae!(SAE);
3650	_mm512_mask_cvtt_roundpd_epi64::<SAE>(src:_mm512_undefined_epi32(), k:`0xff`, a)
3651	}
3652
3653	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3654	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3655	/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3656	///
3657	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi64&ig_expand=2265)
3658	#[inline]
3659	#[target_feature(enable = "avx512dq")]
3660	#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = `8`))]
3661	#[rustc_legacy_const_generics(`3`)]
3662	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3663	pub fn _mm512_mask_cvtt_roundpd_epi64<const SAE: i32>(
3664	src: __m512i,
3665	k: __mmask8,
3666	a: __m512d,
3667	) -> __m512i {
3668	unsafe {
3669	static_assert_sae!(SAE);
3670	transmute(src:vcvttpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, SAE))
3671	}
3672	}
3673
3674	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3675	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3676	/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3677	///
3678	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi64&ig_expand=2266)
3679	#[inline]
3680	#[target_feature(enable = "avx512dq")]
3681	#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = `8`))]
3682	#[rustc_legacy_const_generics(`2`)]
3683	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3684	pub fn _mm512_maskz_cvtt_roundpd_epi64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
3685	static_assert_sae!(SAE);
3686	_mm512_mask_cvtt_roundpd_epi64::<SAE>(src:_mm512_setzero_si512(), k, a)
3687	}
3688
3689	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3690	/// with truncation, and store the result in dst.
3691	///
3692	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi64&ig_expand=2329)
3693	#[inline]
3694	#[target_feature(enable = "avx512dq,avx512vl")]
3695	#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3696	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3697	pub fn _mm_cvttpd_epi64(a: __m128d) -> __m128i {
3698	_mm_mask_cvttpd_epi64(src:_mm_undefined_si128(), k:`0xff`, a)
3699	}
3700
3701	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3702	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3703	/// corresponding bit is not set).
3704	///
3705	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi64&ig_expand=2330)
3706	#[inline]
3707	#[target_feature(enable = "avx512dq,avx512vl")]
3708	#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3709	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3710	pub fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3711	unsafe { transmute(src:vcvttpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) }
3712	}
3713
3714	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3715	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3716	/// bit is not set).
3717	///
3718	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi64&ig_expand=2331)
3719	#[inline]
3720	#[target_feature(enable = "avx512dq,avx512vl")]
3721	#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3722	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3723	pub fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
3724	_mm_mask_cvttpd_epi64(src:_mm_setzero_si128(), k, a)
3725	}
3726
3727	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3728	/// with truncation, and store the result in dst.
3729	///
3730	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi64&ig_expand=2332)
3731	#[inline]
3732	#[target_feature(enable = "avx512dq,avx512vl")]
3733	#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3734	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3735	pub fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i {
3736	_mm256_mask_cvttpd_epi64(src:_mm256_undefined_si256(), k:`0xff`, a)
3737	}
3738
3739	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3740	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3741	/// corresponding bit is not set).
3742	///
3743	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi64&ig_expand=2333)
3744	#[inline]
3745	#[target_feature(enable = "avx512dq,avx512vl")]
3746	#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3747	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3748	pub fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3749	unsafe { transmute(src:vcvttpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) }
3750	}
3751
3752	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3753	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3754	/// bit is not set).
3755	///
3756	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi64&ig_expand=2334)
3757	#[inline]
3758	#[target_feature(enable = "avx512dq,avx512vl")]
3759	#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3760	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3761	pub fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
3762	_mm256_mask_cvttpd_epi64(src:_mm256_setzero_si256(), k, a)
3763	}
3764
3765	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3766	/// with truncation, and store the result in dst.
3767	///
3768	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi64&ig_expand=2335)
3769	#[inline]
3770	#[target_feature(enable = "avx512dq")]
3771	#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3772	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3773	pub fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i {
3774	_mm512_mask_cvttpd_epi64(src:_mm512_undefined_epi32(), k:`0xff`, a)
3775	}
3776
3777	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3778	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3779	/// corresponding bit is not set).
3780	///
3781	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi64&ig_expand=2336)
3782	#[inline]
3783	#[target_feature(enable = "avx512dq")]
3784	#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3785	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3786	pub fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3787	unsafe {
3788	transmute(src:vcvttpd2qq_512(
3789	a.as_f64x8(),
3790	src.as_i64x8(),
3791	k,
3792	_MM_FROUND_CUR_DIRECTION,
3793	))
3794	}
3795	}
3796
3797	/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3798	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3799	/// bit is not set).
3800	///
3801	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi64&ig_expand=2337)
3802	#[inline]
3803	#[target_feature(enable = "avx512dq")]
3804	#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3805	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3806	pub fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
3807	_mm512_mask_cvttpd_epi64(src:_mm512_setzero_si512(), k, a)
3808	}
3809
3810	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3811	/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3812	/// to the sae parameter.
3813	///
3814	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi64&ig_expand=2294)
3815	#[inline]
3816	#[target_feature(enable = "avx512dq")]
3817	#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = `8`))]
3818	#[rustc_legacy_const_generics(`1`)]
3819	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3820	pub fn _mm512_cvtt_roundps_epi64<const SAE: i32>(a: __m256) -> __m512i {
3821	static_assert_sae!(SAE);
3822	_mm512_mask_cvtt_roundps_epi64::<SAE>(src:_mm512_undefined_epi32(), k:`0xff`, a)
3823	}
3824
3825	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3826	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3827	/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3828	///
3829	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi64&ig_expand=2295)
3830	#[inline]
3831	#[target_feature(enable = "avx512dq")]
3832	#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = `8`))]
3833	#[rustc_legacy_const_generics(`3`)]
3834	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3835	pub fn _mm512_mask_cvtt_roundps_epi64<const SAE: i32>(
3836	src: __m512i,
3837	k: __mmask8,
3838	a: __m256,
3839	) -> __m512i {
3840	unsafe {
3841	static_assert_sae!(SAE);
3842	transmute(src:vcvttps2qq_512(a.as_f32x8(), src.as_i64x8(), k, SAE))
3843	}
3844	}
3845
3846	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3847	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3848	/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3849	///
3850	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi64&ig_expand=2296)
3851	#[inline]
3852	#[target_feature(enable = "avx512dq")]
3853	#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = `8`))]
3854	#[rustc_legacy_const_generics(`2`)]
3855	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3856	pub fn _mm512_maskz_cvtt_roundps_epi64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
3857	static_assert_sae!(SAE);
3858	_mm512_mask_cvtt_roundps_epi64::<SAE>(src:_mm512_setzero_si512(), k, a)
3859	}
3860
3861	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3862	/// with truncation, and store the result in dst.
3863	///
3864	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi64&ig_expand=2420)
3865	#[inline]
3866	#[target_feature(enable = "avx512dq,avx512vl")]
3867	#[cfg_attr(test, assert_instr(vcvttps2qq))]
3868	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3869	pub fn _mm_cvttps_epi64(a: __m128) -> __m128i {
3870	_mm_mask_cvttps_epi64(src:_mm_undefined_si128(), k:`0xff`, a)
3871	}
3872
3873	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3874	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3875	/// corresponding bit is not set).
3876	///
3877	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi64&ig_expand=2421)
3878	#[inline]
3879	#[target_feature(enable = "avx512dq,avx512vl")]
3880	#[cfg_attr(test, assert_instr(vcvttps2qq))]
3881	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3882	pub fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3883	unsafe { transmute(src:vcvttps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) }
3884	}
3885
3886	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3887	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3888	/// bit is not set).
3889	///
3890	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi64&ig_expand=2422)
3891	#[inline]
3892	#[target_feature(enable = "avx512dq,avx512vl")]
3893	#[cfg_attr(test, assert_instr(vcvttps2qq))]
3894	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3895	pub fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i {
3896	_mm_mask_cvttps_epi64(src:_mm_setzero_si128(), k, a)
3897	}
3898
3899	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3900	/// with truncation, and store the result in dst.
3901	///
3902	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi64&ig_expand=2423)
3903	#[inline]
3904	#[target_feature(enable = "avx512dq,avx512vl")]
3905	#[cfg_attr(test, assert_instr(vcvttps2qq))]
3906	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3907	pub fn _mm256_cvttps_epi64(a: __m128) -> __m256i {
3908	_mm256_mask_cvttps_epi64(src:_mm256_undefined_si256(), k:`0xff`, a)
3909	}
3910
3911	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3912	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3913	/// corresponding bit is not set).
3914	///
3915	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi64&ig_expand=2424)
3916	#[inline]
3917	#[target_feature(enable = "avx512dq,avx512vl")]
3918	#[cfg_attr(test, assert_instr(vcvttps2qq))]
3919	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3920	pub fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3921	unsafe { transmute(src:vcvttps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) }
3922	}
3923
3924	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3925	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3926	/// bit is not set).
3927	///
3928	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi64&ig_expand=2425)
3929	#[inline]
3930	#[target_feature(enable = "avx512dq,avx512vl")]
3931	#[cfg_attr(test, assert_instr(vcvttps2qq))]
3932	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3933	pub fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i {
3934	_mm256_mask_cvttps_epi64(src:_mm256_setzero_si256(), k, a)
3935	}
3936
3937	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3938	/// with truncation, and store the result in dst.
3939	///
3940	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi64&ig_expand=2426)
3941	#[inline]
3942	#[target_feature(enable = "avx512dq")]
3943	#[cfg_attr(test, assert_instr(vcvttps2qq))]
3944	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3945	pub fn _mm512_cvttps_epi64(a: __m256) -> __m512i {
3946	_mm512_mask_cvttps_epi64(src:_mm512_undefined_epi32(), k:`0xff`, a)
3947	}
3948
3949	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3950	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3951	/// corresponding bit is not set).
3952	///
3953	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi64&ig_expand=2427)
3954	#[inline]
3955	#[target_feature(enable = "avx512dq")]
3956	#[cfg_attr(test, assert_instr(vcvttps2qq))]
3957	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3958	pub fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3959	unsafe {
3960	transmute(src:vcvttps2qq_512(
3961	a.as_f32x8(),
3962	src.as_i64x8(),
3963	k,
3964	_MM_FROUND_CUR_DIRECTION,
3965	))
3966	}
3967	}
3968
3969	/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3970	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3971	/// bit is not set).
3972	///
3973	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi64&ig_expand=2428)
3974	#[inline]
3975	#[target_feature(enable = "avx512dq")]
3976	#[cfg_attr(test, assert_instr(vcvttps2qq))]
3977	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3978	pub fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i {
3979	_mm512_mask_cvttps_epi64(src:_mm512_setzero_si512(), k, a)
3980	}
3981
3982	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3983	/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3984	/// to the sae parameter.
3985	///
3986	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu64&ig_expand=1965)
3987	#[inline]
3988	#[target_feature(enable = "avx512dq")]
3989	#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = `8`))]
3990	#[rustc_legacy_const_generics(`1`)]
3991	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3992	pub fn _mm512_cvtt_roundpd_epu64<const SAE: i32>(a: __m512d) -> __m512i {
3993	static_assert_sae!(SAE);
3994	_mm512_mask_cvtt_roundpd_epu64::<SAE>(src:_mm512_undefined_epi32(), k:`0xff`, a)
3995	}
3996
3997	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3998	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3999	/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4000	///
4001	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu64&ig_expand=1966)
4002	#[inline]
4003	#[target_feature(enable = "avx512dq")]
4004	#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = `8`))]
4005	#[rustc_legacy_const_generics(`3`)]
4006	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4007	pub fn _mm512_mask_cvtt_roundpd_epu64<const SAE: i32>(
4008	src: __m512i,
4009	k: __mmask8,
4010	a: __m512d,
4011	) -> __m512i {
4012	unsafe {
4013	static_assert_sae!(SAE);
4014	transmute(src:vcvttpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, SAE))
4015	}
4016	}
4017
4018	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4019	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4020	/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4021	///
4022	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu64&ig_expand=1967)
4023	#[inline]
4024	#[target_feature(enable = "avx512dq")]
4025	#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = `8`))]
4026	#[rustc_legacy_const_generics(`2`)]
4027	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4028	pub fn _mm512_maskz_cvtt_roundpd_epu64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
4029	static_assert_sae!(SAE);
4030	_mm512_mask_cvtt_roundpd_epu64::<SAE>(src:_mm512_setzero_si512(), k, a)
4031	}
4032
4033	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4034	/// with truncation, and store the result in dst.
4035	///
4036	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu64&ig_expand=2347)
4037	#[inline]
4038	#[target_feature(enable = "avx512dq,avx512vl")]
4039	#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4040	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4041	pub fn _mm_cvttpd_epu64(a: __m128d) -> __m128i {
4042	_mm_mask_cvttpd_epu64(src:_mm_undefined_si128(), k:`0xff`, a)
4043	}
4044
4045	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4046	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
4047	/// bit is not set).
4048	///
4049	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu64&ig_expand=2348)
4050	#[inline]
4051	#[target_feature(enable = "avx512dq,avx512vl")]
4052	#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4053	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4054	pub fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
4055	unsafe { transmute(src:vcvttpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) }
4056	}
4057
4058	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4059	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4060	/// bit is not set).
4061	///
4062	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu64&ig_expand=2349)
4063	#[inline]
4064	#[target_feature(enable = "avx512dq,avx512vl")]
4065	#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4066	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4067	pub fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
4068	_mm_mask_cvttpd_epu64(src:_mm_setzero_si128(), k, a)
4069	}
4070
4071	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4072	/// with truncation, and store the result in dst.
4073	///
4074	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu64&ig_expand=2350)
4075	#[inline]
4076	#[target_feature(enable = "avx512dq,avx512vl")]
4077	#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4078	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4079	pub fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i {
4080	_mm256_mask_cvttpd_epu64(src:_mm256_undefined_si256(), k:`0xff`, a)
4081	}
4082
4083	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4084	/// with truncation, and store the results in dst using writemask k (elements are copied from src if the corresponding
4085	/// bit is not set).
4086	///
4087	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu64&ig_expand=2351)
4088	#[inline]
4089	#[target_feature(enable = "avx512dq,avx512vl")]
4090	#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4091	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4092	pub fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
4093	unsafe { transmute(src:vcvttpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) }
4094	}
4095
4096	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4097	/// with truncation, and store the results in dst using zeromask k (elements are zeroed out if the corresponding
4098	/// bit is not set).
4099	///
4100	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu64&ig_expand=2352)
4101	#[inline]
4102	#[target_feature(enable = "avx512dq,avx512vl")]
4103	#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4104	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4105	pub fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
4106	_mm256_mask_cvttpd_epu64(src:_mm256_setzero_si256(), k, a)
4107	}
4108
4109	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4110	/// with truncation, and store the result in dst.
4111	///
4112	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu64&ig_expand=2353)
4113	#[inline]
4114	#[target_feature(enable = "avx512dq")]
4115	#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4116	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4117	pub fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i {
4118	_mm512_mask_cvttpd_epu64(src:_mm512_undefined_epi32(), k:`0xff`, a)
4119	}
4120
4121	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4122	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
4123	/// bit is not set).
4124	///
4125	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu64&ig_expand=2354)
4126	#[inline]
4127	#[target_feature(enable = "avx512dq")]
4128	#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4129	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4130	pub fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
4131	unsafe {
4132	transmute(src:vcvttpd2uqq_512(
4133	a.as_f64x8(),
4134	src.as_u64x8(),
4135	k,
4136	_MM_FROUND_CUR_DIRECTION,
4137	))
4138	}
4139	}
4140
4141	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4142	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4143	///
4144	///
4145	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu64&ig_expand=2355)
4146	#[inline]
4147	#[target_feature(enable = "avx512dq")]
4148	#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4149	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4150	pub fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
4151	_mm512_mask_cvttpd_epu64(src:_mm512_setzero_si512(), k, a)
4152	}
4153
4154	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4155	/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
4156	/// to the sae parameter.
4157	///
4158	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu64&ig_expand=2300)
4159	#[inline]
4160	#[target_feature(enable = "avx512dq")]
4161	#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = `8`))]
4162	#[rustc_legacy_const_generics(`1`)]
4163	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4164	pub fn _mm512_cvtt_roundps_epu64<const SAE: i32>(a: __m256) -> __m512i {
4165	static_assert_sae!(SAE);
4166	_mm512_mask_cvtt_roundps_epu64::<SAE>(src:_mm512_undefined_epi32(), k:`0xff`, a)
4167	}
4168
4169	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4170	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4171	/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4172	///
4173	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu64&ig_expand=2301)
4174	#[inline]
4175	#[target_feature(enable = "avx512dq")]
4176	#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = `8`))]
4177	#[rustc_legacy_const_generics(`3`)]
4178	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4179	pub fn _mm512_mask_cvtt_roundps_epu64<const SAE: i32>(
4180	src: __m512i,
4181	k: __mmask8,
4182	a: __m256,
4183	) -> __m512i {
4184	unsafe {
4185	static_assert_sae!(SAE);
4186	transmute(src:vcvttps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, SAE))
4187	}
4188	}
4189
4190	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4191	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4192	/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4193	///
4194	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu64&ig_expand=2302)
4195	#[inline]
4196	#[target_feature(enable = "avx512dq")]
4197	#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = `8`))]
4198	#[rustc_legacy_const_generics(`2`)]
4199	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4200	pub fn _mm512_maskz_cvtt_roundps_epu64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
4201	static_assert_sae!(SAE);
4202	_mm512_mask_cvtt_roundps_epu64::<SAE>(src:_mm512_setzero_si512(), k, a)
4203	}
4204
4205	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4206	/// with truncation, and store the result in dst.
4207	///
4208	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu64&ig_expand=2438)
4209	#[inline]
4210	#[target_feature(enable = "avx512dq,avx512vl")]
4211	#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4212	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4213	pub fn _mm_cvttps_epu64(a: __m128) -> __m128i {
4214	_mm_mask_cvttps_epu64(src:_mm_undefined_si128(), k:`0xff`, a)
4215	}
4216
4217	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4218	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4219	/// corresponding bit is not set).
4220	///
4221	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu64&ig_expand=2439)
4222	#[inline]
4223	#[target_feature(enable = "avx512dq,avx512vl")]
4224	#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4225	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4226	pub fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
4227	unsafe { transmute(src:vcvttps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) }
4228	}
4229
4230	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4231	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4232	/// bit is not set).
4233	///
4234	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu64&ig_expand=2440)
4235	#[inline]
4236	#[target_feature(enable = "avx512dq,avx512vl")]
4237	#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4238	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4239	pub fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i {
4240	_mm_mask_cvttps_epu64(src:_mm_setzero_si128(), k, a)
4241	}
4242
4243	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4244	/// with truncation, and store the result in dst.
4245	///
4246	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu64&ig_expand=2441)
4247	#[inline]
4248	#[target_feature(enable = "avx512dq,avx512vl")]
4249	#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4250	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4251	pub fn _mm256_cvttps_epu64(a: __m128) -> __m256i {
4252	_mm256_mask_cvttps_epu64(src:_mm256_undefined_si256(), k:`0xff`, a)
4253	}
4254
4255	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4256	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4257	/// corresponding bit is not set).
4258	///
4259	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu64&ig_expand=2442)
4260	#[inline]
4261	#[target_feature(enable = "avx512dq,avx512vl")]
4262	#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4263	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4264	pub fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
4265	unsafe { transmute(src:vcvttps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) }
4266	}
4267
4268	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4269	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4270	/// bit is not set).
4271	///
4272	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu64&ig_expand=2443)
4273	#[inline]
4274	#[target_feature(enable = "avx512dq,avx512vl")]
4275	#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4276	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4277	pub fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i {
4278	_mm256_mask_cvttps_epu64(src:_mm256_setzero_si256(), k, a)
4279	}
4280
4281	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4282	/// with truncation, and store the result in dst.
4283	///
4284	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu64&ig_expand=2444)
4285	#[inline]
4286	#[target_feature(enable = "avx512dq")]
4287	#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4288	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4289	pub fn _mm512_cvttps_epu64(a: __m256) -> __m512i {
4290	_mm512_mask_cvttps_epu64(src:_mm512_undefined_epi32(), k:`0xff`, a)
4291	}
4292
4293	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4294	/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4295	/// corresponding bit is not set).
4296	///
4297	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu64&ig_expand=2445)
4298	#[inline]
4299	#[target_feature(enable = "avx512dq")]
4300	#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4301	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4302	pub fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
4303	unsafe {
4304	transmute(src:vcvttps2uqq_512(
4305	a.as_f32x8(),
4306	src.as_u64x8(),
4307	k,
4308	_MM_FROUND_CUR_DIRECTION,
4309	))
4310	}
4311	}
4312
4313	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4314	/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4315	/// bit is not set).
4316	///
4317	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu64&ig_expand=2446)
4318	#[inline]
4319	#[target_feature(enable = "avx512dq")]
4320	#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4321	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4322	pub fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i {
4323	_mm512_mask_cvttps_epu64(src:_mm512_setzero_si512(), k, a)
4324	}
4325
4326	// Multiply-Low
4327
4328	/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4329	/// the low 64 bits of the intermediate integers in `dst`.
4330	///
4331	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi64&ig_expand=4778)
4332	#[inline]
4333	#[target_feature(enable = "avx512dq,avx512vl")]
4334	#[cfg_attr(test, assert_instr(vpmullq))]
4335	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4336	pub fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i {
4337	unsafe { transmute(src:simd_mul(x:a.as_i64x2(), y:b.as_i64x2())) }
4338	}
4339
4340	/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4341	/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4342	/// `src` if the corresponding bit is not set).
4343	///
4344	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi64&ig_expand=4776)
4345	#[inline]
4346	#[target_feature(enable = "avx512dq,avx512vl")]
4347	#[cfg_attr(test, assert_instr(vpmullq))]
4348	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4349	pub fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4350	unsafe {
4351	let b: i64x2 = _mm_mullo_epi64(a, b).as_i64x2();
4352	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x2()))
4353	}
4354	}
4355
4356	/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4357	/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4358	/// the corresponding bit is not set).
4359	///
4360	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi64&ig_expand=4777)
4361	#[inline]
4362	#[target_feature(enable = "avx512dq,avx512vl")]
4363	#[cfg_attr(test, assert_instr(vpmullq))]
4364	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4365	pub fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4366	unsafe {
4367	let b: i64x2 = _mm_mullo_epi64(a, b).as_i64x2();
4368	transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x2::ZERO))
4369	}
4370	}
4371
4372	/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4373	/// the low 64 bits of the intermediate integers in `dst`.
4374	///
4375	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi64&ig_expand=4781)
4376	#[inline]
4377	#[target_feature(enable = "avx512dq,avx512vl")]
4378	#[cfg_attr(test, assert_instr(vpmullq))]
4379	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4380	pub fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i {
4381	unsafe { transmute(src:simd_mul(x:a.as_i64x4(), y:b.as_i64x4())) }
4382	}
4383
4384	/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4385	/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4386	/// `src` if the corresponding bit is not set).
4387	///
4388	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi64&ig_expand=4779)
4389	#[inline]
4390	#[target_feature(enable = "avx512dq,avx512vl")]
4391	#[cfg_attr(test, assert_instr(vpmullq))]
4392	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4393	pub fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4394	unsafe {
4395	let b: i64x4 = _mm256_mullo_epi64(a, b).as_i64x4();
4396	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x4()))
4397	}
4398	}
4399
4400	/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4401	/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4402	/// the corresponding bit is not set).
4403	///
4404	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi64&ig_expand=4780)
4405	#[inline]
4406	#[target_feature(enable = "avx512dq,avx512vl")]
4407	#[cfg_attr(test, assert_instr(vpmullq))]
4408	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4409	pub fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4410	unsafe {
4411	let b: i64x4 = _mm256_mullo_epi64(a, b).as_i64x4();
4412	transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x4::ZERO))
4413	}
4414	}
4415
4416	/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4417	/// the low 64 bits of the intermediate integers in `dst`.
4418	///
4419	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi64&ig_expand=4784)
4420	#[inline]
4421	#[target_feature(enable = "avx512dq")]
4422	#[cfg_attr(test, assert_instr(vpmullq))]
4423	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4424	pub fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i {
4425	unsafe { transmute(src:simd_mul(x:a.as_i64x8(), y:b.as_i64x8())) }
4426	}
4427
4428	/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4429	/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4430	/// `src` if the corresponding bit is not set).
4431	///
4432	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi64&ig_expand=4782)
4433	#[inline]
4434	#[target_feature(enable = "avx512dq")]
4435	#[cfg_attr(test, assert_instr(vpmullq))]
4436	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4437	pub fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
4438	unsafe {
4439	let b: i64x8 = _mm512_mullo_epi64(a, b).as_i64x8();
4440	transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x8()))
4441	}
4442	}
4443
4444	/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4445	/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4446	/// the corresponding bit is not set).
4447	///
4448	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi64&ig_expand=4783)
4449	#[inline]
4450	#[target_feature(enable = "avx512dq")]
4451	#[cfg_attr(test, assert_instr(vpmullq))]
4452	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4453	pub fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
4454	unsafe {
4455	let b: i64x8 = _mm512_mullo_epi64(a, b).as_i64x8();
4456	transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x8::ZERO))
4457	}
4458	}
4459
4460	// Mask Registers
4461
4462	/// Convert 8-bit mask a to a 32-bit integer value and store the result in dst.
4463	///
4464	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask8_u32&ig_expand=1891)
4465	#[inline]
4466	#[target_feature(enable = "avx512dq")]
4467	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4468	pub fn _cvtmask8_u32(a: __mmask8) -> u32 {
4469	a as u32
4470	}
4471
4472	/// Convert 32-bit integer value a to an 8-bit mask and store the result in dst.
4473	///
4474	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask8&ig_expand=2467)
4475	#[inline]
4476	#[target_feature(enable = "avx512dq")]
4477	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4478	pub fn _cvtu32_mask8(a: u32) -> __mmask8 {
4479	a as __mmask8
4480	}
4481
4482	/// Add 16-bit masks a and b, and store the result in dst.
4483	///
4484	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask16&ig_expand=3903)
4485	#[inline]
4486	#[target_feature(enable = "avx512dq")]
4487	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4488	pub fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
4489	a + b
4490	}
4491
4492	/// Add 8-bit masks a and b, and store the result in dst.
4493	///
4494	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask8&ig_expand=3906)
4495	#[inline]
4496	#[target_feature(enable = "avx512dq")]
4497	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4498	pub fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4499	a + b
4500	}
4501
4502	/// Bitwise AND of 8-bit masks a and b, and store the result in dst.
4503	///
4504	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask8&ig_expand=3911)
4505	#[inline]
4506	#[target_feature(enable = "avx512dq")]
4507	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4508	pub fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4509	a & b
4510	}
4511
4512	/// Bitwise AND NOT of 8-bit masks a and b, and store the result in dst.
4513	///
4514	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask8&ig_expand=3916)
4515	#[inline]
4516	#[target_feature(enable = "avx512dq")]
4517	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4518	pub fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4519	_knot_mask8(a) & b
4520	}
4521
4522	/// Bitwise NOT of 8-bit mask a, and store the result in dst.
4523	///
4524	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask8&ig_expand=3922)
4525	#[inline]
4526	#[target_feature(enable = "avx512dq")]
4527	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4528	pub fn _knot_mask8(a: __mmask8) -> __mmask8 {
4529	a ^ `0b11111111`
4530	}
4531
4532	/// Bitwise OR of 8-bit masks a and b, and store the result in dst.
4533	///
4534	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask8&ig_expand=3927)
4535	#[inline]
4536	#[target_feature(enable = "avx512dq")]
4537	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4538	pub fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4539	a \| b
4540	}
4541
4542	/// Bitwise XNOR of 8-bit masks a and b, and store the result in dst.
4543	///
4544	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask8&ig_expand=3969)
4545	#[inline]
4546	#[target_feature(enable = "avx512dq")]
4547	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4548	pub fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4549	_knot_mask8(_kxor_mask8(a, b))
4550	}
4551
4552	/// Bitwise XOR of 8-bit masks a and b, and store the result in dst.
4553	///
4554	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask8&ig_expand=3974)
4555	#[inline]
4556	#[target_feature(enable = "avx512dq")]
4557	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4558	pub fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4559	a ^ b
4560	}
4561
4562	/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
4563	/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
4564	///
4565	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask8_u8&ig_expand=3931)
4566	#[inline]
4567	#[target_feature(enable = "avx512dq")]
4568	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4569	pub unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> u8 {
4570	let tmp: u8 = _kor_mask8(a, b);
4571	*all_ones = (tmp == `0xff`) as u8;
4572	(tmp == `0`) as u8
4573	}
4574
4575	/// Compute the bitwise OR of 8-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
4576	/// store 0 in dst.
4577	///
4578	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask8_u8&ig_expand=3936)
4579	#[inline]
4580	#[target_feature(enable = "avx512dq")]
4581	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4582	pub fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4583	(_kor_mask8(a, b) == `0xff`) as u8
4584	}
4585
4586	/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
4587	/// store 0 in dst.
4588	///
4589	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask8_u8&ig_expand=3941)
4590	#[inline]
4591	#[target_feature(enable = "avx512dq")]
4592	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4593	pub fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4594	(_kor_mask8(a, b) == `0`) as u8
4595	}
4596
4597	/// Shift 8-bit mask a left by count bits while shifting in zeros, and store the result in dst.
4598	///
4599	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask8&ig_expand=3945)
4600	#[inline]
4601	#[target_feature(enable = "avx512dq")]
4602	#[rustc_legacy_const_generics(`1`)]
4603	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4604	pub fn _kshiftli_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
4605	a << COUNT
4606	}
4607
4608	/// Shift 8-bit mask a right by count bits while shifting in zeros, and store the result in dst.
4609	///
4610	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask8&ig_expand=3949)
4611	#[inline]
4612	#[target_feature(enable = "avx512dq")]
4613	#[rustc_legacy_const_generics(`1`)]
4614	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4615	pub fn _kshiftri_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
4616	a >> COUNT
4617	}
4618
4619	/// Compute the bitwise AND of 16-bit masks a and b, and if the result is all zeros, store 1 in dst,
4620	/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
4621	/// zeros, store 1 in and_not, otherwise store 0 in and_not.
4622	///
4623	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask16_u8&ig_expand=3950)
4624	#[inline]
4625	#[target_feature(enable = "avx512dq")]
4626	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4627	pub unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) -> u8 {
4628	*and_not = (_kandn_mask16(a, b) == `0`) as u8;
4629	(_kand_mask16(a, b) == `0`) as u8
4630	}
4631
4632	/// Compute the bitwise AND of 8-bit masks a and b, and if the result is all zeros, store 1 in dst,
4633	/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
4634	/// zeros, store 1 in and_not, otherwise store 0 in and_not.
4635	///
4636	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask8_u8&ig_expand=3953)
4637	#[inline]
4638	#[target_feature(enable = "avx512dq")]
4639	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4640	pub unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 {
4641	*and_not = (_kandn_mask8(a, b) == `0`) as u8;
4642	(_kand_mask8(a, b) == `0`) as u8
4643	}
4644
4645	/// Compute the bitwise NOT of 16-bit mask a and then AND with 16-bit mask b, if the result is all
4646	/// zeros, store 1 in dst, otherwise store 0 in dst.
4647	///
4648	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask16_u8&ig_expand=3954)
4649	#[inline]
4650	#[target_feature(enable = "avx512dq")]
4651	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4652	pub fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
4653	(_kandn_mask16(a, b) == `0`) as u8
4654	}
4655
4656	/// Compute the bitwise NOT of 8-bit mask a and then AND with 8-bit mask b, if the result is all
4657	/// zeros, store 1 in dst, otherwise store 0 in dst.
4658	///
4659	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask8_u8&ig_expand=3957)
4660	#[inline]
4661	#[target_feature(enable = "avx512dq")]
4662	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4663	pub fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4664	(_kandn_mask8(a, b) == `0`) as u8
4665	}
4666
4667	/// Compute the bitwise AND of 16-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise
4668	/// store 0 in dst.
4669	///
4670	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask16_u8&ig_expand=3958)
4671	#[inline]
4672	#[target_feature(enable = "avx512dq")]
4673	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4674	pub fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
4675	(_kand_mask16(a, b) == `0`) as u8
4676	}
4677
4678	/// Compute the bitwise AND of 8-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise
4679	/// store 0 in dst.
4680	///
4681	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask8_u8&ig_expand=3961)
4682	#[inline]
4683	#[target_feature(enable = "avx512dq")]
4684	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4685	pub fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4686	(_kand_mask8(a, b) == `0`) as u8
4687	}
4688
4689	/// Load 8-bit mask from memory
4690	///
4691	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask8&ig_expand=3999)
4692	#[inline]
4693	#[target_feature(enable = "avx512dq")]
4694	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4695	pub unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 {
4696	*mem_addr
4697	}
4698
4699	/// Store 8-bit mask to memory
4700	///
4701	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask8&ig_expand=6468)
4702	#[inline]
4703	#[target_feature(enable = "avx512dq")]
4704	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4705	pub unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) {
4706	*mem_addr = a;
4707	}
4708
4709	/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4710	/// integer in a.
4711	///
4712	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi32_mask&ig_expand=4612)
4713	#[inline]
4714	#[target_feature(enable = "avx512dq,avx512vl")]
4715	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4716	pub fn _mm_movepi32_mask(a: __m128i) -> __mmask8 {
4717	let zero: __m128i = _mm_setzero_si128();
4718	_mm_cmplt_epi32_mask(a, b:zero)
4719	}
4720
4721	/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4722	/// integer in a.
4723	///
4724	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi32_mask&ig_expand=4613)
4725	#[inline]
4726	#[target_feature(enable = "avx512dq,avx512vl")]
4727	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4728	pub fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 {
4729	let zero: __m256i = _mm256_setzero_si256();
4730	_mm256_cmplt_epi32_mask(a, b:zero)
4731	}
4732
4733	/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4734	/// integer in a.
4735	///
4736	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi32_mask&ig_expand=4614)
4737	#[inline]
4738	#[target_feature(enable = "avx512dq")]
4739	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4740	pub fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 {
4741	let zero: __m512i = _mm512_setzero_si512();
4742	_mm512_cmplt_epi32_mask(a, b:zero)
4743	}
4744
4745	/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4746	/// integer in a.
4747	///
4748	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_mask&ig_expand=4615)
4749	#[inline]
4750	#[target_feature(enable = "avx512dq,avx512vl")]
4751	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4752	pub fn _mm_movepi64_mask(a: __m128i) -> __mmask8 {
4753	let zero: __m128i = _mm_setzero_si128();
4754	_mm_cmplt_epi64_mask(a, b:zero)
4755	}
4756
4757	/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4758	/// integer in a.
4759	///
4760	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi64_mask&ig_expand=4616)
4761	#[inline]
4762	#[target_feature(enable = "avx512dq,avx512vl")]
4763	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4764	pub fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 {
4765	let zero: __m256i = _mm256_setzero_si256();
4766	_mm256_cmplt_epi64_mask(a, b:zero)
4767	}
4768
4769	/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4770	/// integer in a.
4771	///
4772	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi64_mask&ig_expand=4617)
4773	#[inline]
4774	#[target_feature(enable = "avx512dq")]
4775	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4776	pub fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 {
4777	let zero: __m512i = _mm512_setzero_si512();
4778	_mm512_cmplt_epi64_mask(a, b:zero)
4779	}
4780
4781	/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4782	/// bit in k.
4783	///
4784	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi32&ig_expand=4625)
4785	#[inline]
4786	#[target_feature(enable = "avx512dq,avx512vl")]
4787	#[cfg_attr(test, assert_instr(vpmovm2d))]
4788	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4789	pub fn _mm_movm_epi32(k: __mmask8) -> __m128i {
4790	let ones: __m128i = _mm_set1_epi32(`-1`);
4791	_mm_maskz_mov_epi32(k, a:ones)
4792	}
4793
4794	/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4795	/// bit in k.
4796	///
4797	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi32&ig_expand=4626)
4798	#[inline]
4799	#[target_feature(enable = "avx512dq,avx512vl")]
4800	#[cfg_attr(test, assert_instr(vpmovm2d))]
4801	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4802	pub fn _mm256_movm_epi32(k: __mmask8) -> __m256i {
4803	let ones: __m256i = _mm256_set1_epi32(`-1`);
4804	_mm256_maskz_mov_epi32(k, a:ones)
4805	}
4806
4807	/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4808	/// bit in k.
4809	///
4810	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi32&ig_expand=4627)
4811	#[inline]
4812	#[target_feature(enable = "avx512dq")]
4813	#[cfg_attr(test, assert_instr(vpmovm2d))]
4814	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4815	pub fn _mm512_movm_epi32(k: __mmask16) -> __m512i {
4816	let ones: __m512i = _mm512_set1_epi32(`-1`);
4817	_mm512_maskz_mov_epi32(k, a:ones)
4818	}
4819
4820	/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
4821	/// bit in k.
4822	///
4823	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi64&ig_expand=4628)
4824	#[inline]
4825	#[target_feature(enable = "avx512dq,avx512vl")]
4826	#[cfg_attr(test, assert_instr(vpmovm2q))]
4827	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4828	pub fn _mm_movm_epi64(k: __mmask8) -> __m128i {
4829	let ones: __m128i = _mm_set1_epi64x(`-1`);
4830	_mm_maskz_mov_epi64(k, a:ones)
4831	}
4832
4833	/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
4834	/// bit in k.
4835	///
4836	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi64&ig_expand=4629)
4837	#[inline]
4838	#[target_feature(enable = "avx512dq,avx512vl")]
4839	#[cfg_attr(test, assert_instr(vpmovm2q))]
4840	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4841	pub fn _mm256_movm_epi64(k: __mmask8) -> __m256i {
4842	let ones: __m256i = _mm256_set1_epi64x(`-1`);
4843	_mm256_maskz_mov_epi64(k, a:ones)
4844	}
4845
4846	/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
4847	/// bit in k.
4848	///
4849	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi64&ig_expand=4630)
4850	#[inline]
4851	#[target_feature(enable = "avx512dq")]
4852	#[cfg_attr(test, assert_instr(vpmovm2q))]
4853	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4854	pub fn _mm512_movm_epi64(k: __mmask8) -> __m512i {
4855	let ones: __m512i = _mm512_set1_epi64(`-1`);
4856	_mm512_maskz_mov_epi64(k, a:ones)
4857	}
4858
4859	// Range
4860
4861	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4862	/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
4863	/// Lower 2 bits of IMM8 specifies the operation control:
4864	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4865	/// Upper 2 bits of IMM8 specifies the sign control:
4866	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4867	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4868	///
4869	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_pd&ig_expand=5210)
4870	#[inline]
4871	#[target_feature(enable = "avx512dq")]
4872	#[cfg_attr(test, assert_instr(vrangepd, IMM8 = `5`, SAE = `8`))]
4873	#[rustc_legacy_const_generics(`2`, `3`)]
4874	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4875	pub fn _mm512_range_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
4876	static_assert_uimm_bits!(IMM8, `4`);
4877	static_assert_sae!(SAE);
4878	_mm512_mask_range_round_pd::<IMM8, SAE>(src:_mm512_setzero_pd(), k:`0xff`, a, b)
4879	}
4880
4881	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4882	/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4883	/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
4884	/// Lower 2 bits of IMM8 specifies the operation control:
4885	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4886	/// Upper 2 bits of IMM8 specifies the sign control:
4887	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4888	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4889	///
4890	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_pd&ig_expand=5208)
4891	#[inline]
4892	#[target_feature(enable = "avx512dq")]
4893	#[cfg_attr(test, assert_instr(vrangepd, IMM8 = `5`, SAE = `8`))]
4894	#[rustc_legacy_const_generics(`4`, `5`)]
4895	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4896	pub fn _mm512_mask_range_round_pd<const IMM8: i32, const SAE: i32>(
4897	src: __m512d,
4898	k: __mmask8,
4899	a: __m512d,
4900	b: __m512d,
4901	) -> __m512d {
4902	unsafe {
4903	static_assert_uimm_bits!(IMM8, `4`);
4904	static_assert_sae!(SAE);
4905	transmute(src:vrangepd_512(
4906	a.as_f64x8(),
4907	b.as_f64x8(),
4908	IMM8,
4909	src.as_f64x8(),
4910	k,
4911	SAE,
4912	))
4913	}
4914	}
4915
4916	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4917	/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4918	/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
4919	/// Lower 2 bits of IMM8 specifies the operation control:
4920	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4921	/// Upper 2 bits of IMM8 specifies the sign control:
4922	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4923	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4924	///
4925	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_pd&ig_expand=5209)
4926	#[inline]
4927	#[target_feature(enable = "avx512dq")]
4928	#[cfg_attr(test, assert_instr(vrangepd, IMM8 = `5`, SAE = `8`))]
4929	#[rustc_legacy_const_generics(`3`, `4`)]
4930	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4931	pub fn _mm512_maskz_range_round_pd<const IMM8: i32, const SAE: i32>(
4932	k: __mmask8,
4933	a: __m512d,
4934	b: __m512d,
4935	) -> __m512d {
4936	static_assert_uimm_bits!(IMM8, `4`);
4937	static_assert_sae!(SAE);
4938	_mm512_mask_range_round_pd::<IMM8, SAE>(src:_mm512_setzero_pd(), k, a, b)
4939	}
4940
4941	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4942	/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
4943	/// Lower 2 bits of IMM8 specifies the operation control:
4944	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4945	/// Upper 2 bits of IMM8 specifies the sign control:
4946	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4947	///
4948	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_pd&ig_expand=5192)
4949	#[inline]
4950	#[target_feature(enable = "avx512dq,avx512vl")]
4951	#[cfg_attr(test, assert_instr(vrangepd, IMM8 = `5`))]
4952	#[rustc_legacy_const_generics(`2`)]
4953	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4954	pub fn _mm_range_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
4955	static_assert_uimm_bits!(IMM8, `4`);
4956	_mm_mask_range_pd::<IMM8>(src:_mm_setzero_pd(), k:`0xff`, a, b)
4957	}
4958
4959	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4960	/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4961	/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
4962	/// Lower 2 bits of IMM8 specifies the operation control:
4963	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4964	/// Upper 2 bits of IMM8 specifies the sign control:
4965	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4966	///
4967	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_pd&ig_expand=5190)
4968	#[inline]
4969	#[target_feature(enable = "avx512dq,avx512vl")]
4970	#[cfg_attr(test, assert_instr(vrangepd, IMM8 = `5`))]
4971	#[rustc_legacy_const_generics(`4`)]
4972	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4973	pub fn _mm_mask_range_pd<const IMM8: i32>(
4974	src: __m128d,
4975	k: __mmask8,
4976	a: __m128d,
4977	b: __m128d,
4978	) -> __m128d {
4979	unsafe {
4980	static_assert_uimm_bits!(IMM8, `4`);
4981	transmute(src:vrangepd_128(
4982	a.as_f64x2(),
4983	b.as_f64x2(),
4984	IMM8,
4985	src.as_f64x2(),
4986	k,
4987	))
4988	}
4989	}
4990
4991	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4992	/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4993	/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
4994	/// Lower 2 bits of IMM8 specifies the operation control:
4995	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4996	/// Upper 2 bits of IMM8 specifies the sign control:
4997	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4998	///
4999	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_pd&ig_expand=5191)
5000	#[inline]
5001	#[target_feature(enable = "avx512dq,avx512vl")]
5002	#[cfg_attr(test, assert_instr(vrangepd, IMM8 = `5`))]
5003	#[rustc_legacy_const_generics(`3`)]
5004	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5005	pub fn _mm_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5006	static_assert_uimm_bits!(IMM8, `4`);
5007	_mm_mask_range_pd::<IMM8>(src:_mm_setzero_pd(), k, a, b)
5008	}
5009
5010	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5011	/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5012	/// Lower 2 bits of IMM8 specifies the operation control:
5013	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5014	/// Upper 2 bits of IMM8 specifies the sign control:
5015	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5016	///
5017	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_pd&ig_expand=5195)
5018	#[inline]
5019	#[target_feature(enable = "avx512dq,avx512vl")]
5020	#[cfg_attr(test, assert_instr(vrangepd, IMM8 = `5`))]
5021	#[rustc_legacy_const_generics(`2`)]
5022	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5023	pub fn _mm256_range_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
5024	static_assert_uimm_bits!(IMM8, `4`);
5025	_mm256_mask_range_pd::<IMM8>(src:_mm256_setzero_pd(), k:`0xff`, a, b)
5026	}
5027
5028	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5029	/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5030	/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5031	/// Lower 2 bits of IMM8 specifies the operation control:
5032	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5033	/// Upper 2 bits of IMM8 specifies the sign control:
5034	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5035	///
5036	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_pd&ig_expand=5193)
5037	#[inline]
5038	#[target_feature(enable = "avx512dq,avx512vl")]
5039	#[cfg_attr(test, assert_instr(vrangepd, IMM8 = `5`))]
5040	#[rustc_legacy_const_generics(`4`)]
5041	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5042	pub fn _mm256_mask_range_pd<const IMM8: i32>(
5043	src: __m256d,
5044	k: __mmask8,
5045	a: __m256d,
5046	b: __m256d,
5047	) -> __m256d {
5048	unsafe {
5049	static_assert_uimm_bits!(IMM8, `4`);
5050	transmute(src:vrangepd_256(
5051	a.as_f64x4(),
5052	b.as_f64x4(),
5053	IMM8,
5054	src.as_f64x4(),
5055	k,
5056	))
5057	}
5058	}
5059
5060	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5061	/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5062	/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5063	/// Lower 2 bits of IMM8 specifies the operation control:
5064	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5065	/// Upper 2 bits of IMM8 specifies the sign control:
5066	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5067	///
5068	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_pd&ig_expand=5194)
5069	#[inline]
5070	#[target_feature(enable = "avx512dq,avx512vl")]
5071	#[cfg_attr(test, assert_instr(vrangepd, IMM8 = `5`))]
5072	#[rustc_legacy_const_generics(`3`)]
5073	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5074	pub fn _mm256_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
5075	static_assert_uimm_bits!(IMM8, `4`);
5076	_mm256_mask_range_pd::<IMM8>(src:_mm256_setzero_pd(), k, a, b)
5077	}
5078
5079	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5080	/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5081	/// Lower 2 bits of IMM8 specifies the operation control:
5082	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5083	/// Upper 2 bits of IMM8 specifies the sign control:
5084	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5085	///
5086	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_pd&ig_expand=5198)
5087	#[inline]
5088	#[target_feature(enable = "avx512dq")]
5089	#[cfg_attr(test, assert_instr(vrangepd, IMM8 = `5`))]
5090	#[rustc_legacy_const_generics(`2`)]
5091	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5092	pub fn _mm512_range_pd<const IMM8: i32>(a: __m512d, b: __m512d) -> __m512d {
5093	static_assert_uimm_bits!(IMM8, `4`);
5094	_mm512_mask_range_pd::<IMM8>(src:_mm512_setzero_pd(), k:`0xff`, a, b)
5095	}
5096
5097	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5098	/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5099	/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5100	/// Lower 2 bits of IMM8 specifies the operation control:
5101	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5102	/// Upper 2 bits of IMM8 specifies the sign control:
5103	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5104	///
5105	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_pd&ig_expand=5196)
5106	#[inline]
5107	#[target_feature(enable = "avx512dq")]
5108	#[cfg_attr(test, assert_instr(vrangepd, IMM8 = `5`))]
5109	#[rustc_legacy_const_generics(`4`)]
5110	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5111	pub fn _mm512_mask_range_pd<const IMM8: i32>(
5112	src: __m512d,
5113	k: __mmask8,
5114	a: __m512d,
5115	b: __m512d,
5116	) -> __m512d {
5117	unsafe {
5118	static_assert_uimm_bits!(IMM8, `4`);
5119	transmute(src:vrangepd_512(
5120	a.as_f64x8(),
5121	b.as_f64x8(),
5122	IMM8,
5123	src.as_f64x8(),
5124	k,
5125	_MM_FROUND_CUR_DIRECTION,
5126	))
5127	}
5128	}
5129
5130	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5131	/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5132	/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5133	/// Lower 2 bits of IMM8 specifies the operation control:
5134	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5135	/// Upper 2 bits of IMM8 specifies the sign control:
5136	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5137	///
5138	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_pd&ig_expand=5197)
5139	#[inline]
5140	#[target_feature(enable = "avx512dq")]
5141	#[cfg_attr(test, assert_instr(vrangepd, IMM8 = `5`))]
5142	#[rustc_legacy_const_generics(`3`)]
5143	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5144	pub fn _mm512_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
5145	static_assert_uimm_bits!(IMM8, `4`);
5146	_mm512_mask_range_pd::<IMM8>(src:_mm512_setzero_pd(), k, a, b)
5147	}
5148
5149	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5150	/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5151	/// Lower 2 bits of IMM8 specifies the operation control:
5152	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5153	/// Upper 2 bits of IMM8 specifies the sign control:
5154	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5155	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5156	///
5157	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_ps&ig_expand=5213)
5158	#[inline]
5159	#[target_feature(enable = "avx512dq")]
5160	#[cfg_attr(test, assert_instr(vrangeps, IMM8 = `5`, SAE = `8`))]
5161	#[rustc_legacy_const_generics(`2`, `3`)]
5162	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5163	pub fn _mm512_range_round_ps<const IMM8: i32, const SAE: i32>(a: __m512, b: __m512) -> __m512 {
5164	static_assert_uimm_bits!(IMM8, `4`);
5165	static_assert_sae!(SAE);
5166	_mm512_mask_range_round_ps::<IMM8, SAE>(src:_mm512_setzero_ps(), k:`0xffff`, a, b)
5167	}
5168
5169	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5170	/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5171	/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5172	/// Lower 2 bits of IMM8 specifies the operation control:
5173	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5174	/// Upper 2 bits of IMM8 specifies the sign control:
5175	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5176	///
5177	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_ps&ig_expand=5211)
5178	#[inline]
5179	#[target_feature(enable = "avx512dq")]
5180	#[cfg_attr(test, assert_instr(vrangeps, IMM8 = `5`, SAE = `8`))]
5181	#[rustc_legacy_const_generics(`4`, `5`)]
5182	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5183	pub fn _mm512_mask_range_round_ps<const IMM8: i32, const SAE: i32>(
5184	src: __m512,
5185	k: __mmask16,
5186	a: __m512,
5187	b: __m512,
5188	) -> __m512 {
5189	unsafe {
5190	static_assert_uimm_bits!(IMM8, `4`);
5191	static_assert_sae!(SAE);
5192	transmute(src:vrangeps_512(
5193	a.as_f32x16(),
5194	b.as_f32x16(),
5195	IMM8,
5196	src.as_f32x16(),
5197	k,
5198	SAE,
5199	))
5200	}
5201	}
5202
5203	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5204	/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5205	/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5206	/// Lower 2 bits of IMM8 specifies the operation control:
5207	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5208	/// Upper 2 bits of IMM8 specifies the sign control:
5209	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5210	///
5211	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_ps&ig_expand=5212)
5212	#[inline]
5213	#[target_feature(enable = "avx512dq")]
5214	#[cfg_attr(test, assert_instr(vrangeps, IMM8 = `5`, SAE = `8`))]
5215	#[rustc_legacy_const_generics(`3`, `4`)]
5216	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5217	pub fn _mm512_maskz_range_round_ps<const IMM8: i32, const SAE: i32>(
5218	k: __mmask16,
5219	a: __m512,
5220	b: __m512,
5221	) -> __m512 {
5222	static_assert_uimm_bits!(IMM8, `4`);
5223	static_assert_sae!(SAE);
5224	_mm512_mask_range_round_ps::<IMM8, SAE>(src:_mm512_setzero_ps(), k, a, b)
5225	}
5226
5227	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5228	/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5229	/// Lower 2 bits of IMM8 specifies the operation control:
5230	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5231	/// Upper 2 bits of IMM8 specifies the sign control:
5232	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5233	///
5234	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_ps&ig_expand=5201)
5235	#[inline]
5236	#[target_feature(enable = "avx512dq,avx512vl")]
5237	#[cfg_attr(test, assert_instr(vrangeps, IMM8 = `5`))]
5238	#[rustc_legacy_const_generics(`2`)]
5239	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5240	pub fn _mm_range_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
5241	static_assert_uimm_bits!(IMM8, `4`);
5242	_mm_mask_range_ps::<IMM8>(src:_mm_setzero_ps(), k:`0xff`, a, b)
5243	}
5244
5245	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5246	/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5247	/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5248	/// Lower 2 bits of IMM8 specifies the operation control:
5249	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5250	/// Upper 2 bits of IMM8 specifies the sign control:
5251	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5252	///
5253	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ps&ig_expand=5199)
5254	#[inline]
5255	#[target_feature(enable = "avx512dq,avx512vl")]
5256	#[cfg_attr(test, assert_instr(vrangeps, IMM8 = `5`))]
5257	#[rustc_legacy_const_generics(`4`)]
5258	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5259	pub fn _mm_mask_range_ps<const IMM8: i32>(
5260	src: __m128,
5261	k: __mmask8,
5262	a: __m128,
5263	b: __m128,
5264	) -> __m128 {
5265	unsafe {
5266	static_assert_uimm_bits!(IMM8, `4`);
5267	transmute(src:vrangeps_128(
5268	a.as_f32x4(),
5269	b.as_f32x4(),
5270	IMM8,
5271	src.as_f32x4(),
5272	k,
5273	))
5274	}
5275	}
5276
5277	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5278	/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5279	/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5280	/// Lower 2 bits of IMM8 specifies the operation control:
5281	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5282	/// Upper 2 bits of IMM8 specifies the sign control:
5283	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5284	///
5285	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ps&ig_expand=5200)
5286	#[inline]
5287	#[target_feature(enable = "avx512dq,avx512vl")]
5288	#[cfg_attr(test, assert_instr(vrangeps, IMM8 = `5`))]
5289	#[rustc_legacy_const_generics(`3`)]
5290	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5291	pub fn _mm_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5292	static_assert_uimm_bits!(IMM8, `4`);
5293	_mm_mask_range_ps::<IMM8>(src:_mm_setzero_ps(), k, a, b)
5294	}
5295
5296	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5297	/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5298	/// Lower 2 bits of IMM8 specifies the operation control:
5299	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5300	/// Upper 2 bits of IMM8 specifies the sign control:
5301	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5302	///
5303	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_ps&ig_expand=5204)
5304	#[inline]
5305	#[target_feature(enable = "avx512dq,avx512vl")]
5306	#[cfg_attr(test, assert_instr(vrangeps, IMM8 = `5`))]
5307	#[rustc_legacy_const_generics(`2`)]
5308	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5309	pub fn _mm256_range_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
5310	static_assert_uimm_bits!(IMM8, `4`);
5311	_mm256_mask_range_ps::<IMM8>(src:_mm256_setzero_ps(), k:`0xff`, a, b)
5312	}
5313
5314	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5315	/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5316	/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5317	/// Lower 2 bits of IMM8 specifies the operation control:
5318	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5319	/// Upper 2 bits of IMM8 specifies the sign control:
5320	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5321	///
5322	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_ps&ig_expand=5202)
5323	#[inline]
5324	#[target_feature(enable = "avx512dq,avx512vl")]
5325	#[cfg_attr(test, assert_instr(vrangeps, IMM8 = `5`))]
5326	#[rustc_legacy_const_generics(`4`)]
5327	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5328	pub fn _mm256_mask_range_ps<const IMM8: i32>(
5329	src: __m256,
5330	k: __mmask8,
5331	a: __m256,
5332	b: __m256,
5333	) -> __m256 {
5334	unsafe {
5335	static_assert_uimm_bits!(IMM8, `4`);
5336	transmute(src:vrangeps_256(
5337	a.as_f32x8(),
5338	b.as_f32x8(),
5339	IMM8,
5340	src.as_f32x8(),
5341	k,
5342	))
5343	}
5344	}
5345
5346	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5347	/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5348	/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5349	/// Lower 2 bits of IMM8 specifies the operation control:
5350	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5351	/// Upper 2 bits of IMM8 specifies the sign control:
5352	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5353	///
5354	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_ps&ig_expand=5203)
5355	#[inline]
5356	#[target_feature(enable = "avx512dq,avx512vl")]
5357	#[cfg_attr(test, assert_instr(vrangeps, IMM8 = `5`))]
5358	#[rustc_legacy_const_generics(`3`)]
5359	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5360	pub fn _mm256_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
5361	static_assert_uimm_bits!(IMM8, `4`);
5362	_mm256_mask_range_ps::<IMM8>(src:_mm256_setzero_ps(), k, a, b)
5363	}
5364
5365	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5366	/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5367	/// Lower 2 bits of IMM8 specifies the operation control:
5368	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5369	/// Upper 2 bits of IMM8 specifies the sign control:
5370	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5371	///
5372	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_ps&ig_expand=5207)
5373	#[inline]
5374	#[target_feature(enable = "avx512dq")]
5375	#[cfg_attr(test, assert_instr(vrangeps, IMM8 = `5`))]
5376	#[rustc_legacy_const_generics(`2`)]
5377	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5378	pub fn _mm512_range_ps<const IMM8: i32>(a: __m512, b: __m512) -> __m512 {
5379	static_assert_uimm_bits!(IMM8, `4`);
5380	_mm512_mask_range_ps::<IMM8>(src:_mm512_setzero_ps(), k:`0xffff`, a, b)
5381	}
5382
5383	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5384	/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5385	/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5386	/// Lower 2 bits of IMM8 specifies the operation control:
5387	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5388	/// Upper 2 bits of IMM8 specifies the sign control:
5389	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5390	///
5391	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_ps&ig_expand=5205)
5392	#[inline]
5393	#[target_feature(enable = "avx512dq")]
5394	#[cfg_attr(test, assert_instr(vrangeps, IMM8 = `5`))]
5395	#[rustc_legacy_const_generics(`4`)]
5396	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5397	pub fn _mm512_mask_range_ps<const IMM8: i32>(
5398	src: __m512,
5399	k: __mmask16,
5400	a: __m512,
5401	b: __m512,
5402	) -> __m512 {
5403	unsafe {
5404	static_assert_uimm_bits!(IMM8, `4`);
5405	transmute(src:vrangeps_512(
5406	a.as_f32x16(),
5407	b.as_f32x16(),
5408	IMM8,
5409	src.as_f32x16(),
5410	k,
5411	_MM_FROUND_CUR_DIRECTION,
5412	))
5413	}
5414	}
5415
5416	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5417	/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5418	/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5419	/// Lower 2 bits of IMM8 specifies the operation control:
5420	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5421	/// Upper 2 bits of IMM8 specifies the sign control:
5422	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5423	///
5424	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_ps&ig_expand=5206)
5425	#[inline]
5426	#[target_feature(enable = "avx512dq")]
5427	#[cfg_attr(test, assert_instr(vrangeps, IMM8 = `5`))]
5428	#[rustc_legacy_const_generics(`3`)]
5429	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5430	pub fn _mm512_maskz_range_ps<const IMM8: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
5431	static_assert_uimm_bits!(IMM8, `4`);
5432	_mm512_mask_range_ps::<IMM8>(src:_mm512_setzero_ps(), k, a, b)
5433	}
5434
5435	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5436	/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5437	/// of dst, and copy the upper element from a to the upper element of dst.
5438	/// Lower 2 bits of IMM8 specifies the operation control:
5439	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5440	/// Upper 2 bits of IMM8 specifies the sign control:
5441	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5442	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5443	///
5444	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_sd&ig_expand=5216)
5445	#[inline]
5446	#[target_feature(enable = "avx512dq")]
5447	#[cfg_attr(test, assert_instr(vrangesd, IMM8 = `5`, SAE = `8`))]
5448	#[rustc_legacy_const_generics(`2`, `3`)]
5449	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5450	pub fn _mm_range_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
5451	static_assert_uimm_bits!(IMM8, `4`);
5452	static_assert_sae!(SAE);
5453	_mm_mask_range_round_sd::<IMM8, SAE>(src:_mm_setzero_pd(), k:`0xff`, a, b)
5454	}
5455
5456	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5457	/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5458	/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5459	/// upper element from a to the upper element of dst.
5460	/// Lower 2 bits of IMM8 specifies the operation control:
5461	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5462	/// Upper 2 bits of IMM8 specifies the sign control:
5463	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5464	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5465	///
5466	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_sd&ig_expand=5214)
5467	#[inline]
5468	#[target_feature(enable = "avx512dq")]
5469	#[cfg_attr(test, assert_instr(vrangesd, IMM8 = `5`, SAE = `8`))]
5470	#[rustc_legacy_const_generics(`4`, `5`)]
5471	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5472	pub fn _mm_mask_range_round_sd<const IMM8: i32, const SAE: i32>(
5473	src: __m128d,
5474	k: __mmask8,
5475	a: __m128d,
5476	b: __m128d,
5477	) -> __m128d {
5478	unsafe {
5479	static_assert_uimm_bits!(IMM8, `4`);
5480	static_assert_sae!(SAE);
5481	transmute(src:vrangesd(
5482	a.as_f64x2(),
5483	b.as_f64x2(),
5484	src.as_f64x2(),
5485	k,
5486	IMM8,
5487	SAE,
5488	))
5489	}
5490	}
5491
5492	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5493	/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5494	/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5495	/// element from a to the upper element of dst.
5496	/// Lower 2 bits of IMM8 specifies the operation control:
5497	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5498	/// Upper 2 bits of IMM8 specifies the sign control:
5499	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5500	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5501	///
5502	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_sd&ig_expand=5215)
5503	#[inline]
5504	#[target_feature(enable = "avx512dq")]
5505	#[cfg_attr(test, assert_instr(vrangesd, IMM8 = `5`, SAE = `8`))]
5506	#[rustc_legacy_const_generics(`3`, `4`)]
5507	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5508	pub fn _mm_maskz_range_round_sd<const IMM8: i32, const SAE: i32>(
5509	k: __mmask8,
5510	a: __m128d,
5511	b: __m128d,
5512	) -> __m128d {
5513	static_assert_uimm_bits!(IMM8, `4`);
5514	static_assert_sae!(SAE);
5515	_mm_mask_range_round_sd::<IMM8, SAE>(src:_mm_setzero_pd(), k, a, b)
5516	}
5517
5518	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5519	/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5520	/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5521	/// upper element from a to the upper element of dst.
5522	/// Lower 2 bits of IMM8 specifies the operation control:
5523	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5524	/// Upper 2 bits of IMM8 specifies the sign control:
5525	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5526	///
5527	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_sd&ig_expand=5220)
5528	#[inline]
5529	#[target_feature(enable = "avx512dq")]
5530	#[cfg_attr(test, assert_instr(vrangesd, IMM8 = `5`))]
5531	#[rustc_legacy_const_generics(`4`)]
5532	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5533	pub fn _mm_mask_range_sd<const IMM8: i32>(
5534	src: __m128d,
5535	k: __mmask8,
5536	a: __m128d,
5537	b: __m128d,
5538	) -> __m128d {
5539	unsafe {
5540	static_assert_uimm_bits!(IMM8, `4`);
5541	transmute(src:vrangesd(
5542	a.as_f64x2(),
5543	b.as_f64x2(),
5544	src.as_f64x2(),
5545	k,
5546	IMM8,
5547	_MM_FROUND_CUR_DIRECTION,
5548	))
5549	}
5550	}
5551
5552	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5553	/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5554	/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5555	/// element from a to the upper element of dst.
5556	/// Lower 2 bits of IMM8 specifies the operation control:
5557	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5558	/// Upper 2 bits of IMM8 specifies the sign control:
5559	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5560	///
5561	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_sd&ig_expand=5221)
5562	#[inline]
5563	#[target_feature(enable = "avx512dq")]
5564	#[cfg_attr(test, assert_instr(vrangesd, IMM8 = `5`))]
5565	#[rustc_legacy_const_generics(`3`)]
5566	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5567	pub fn _mm_maskz_range_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5568	static_assert_uimm_bits!(IMM8, `4`);
5569	_mm_mask_range_sd::<IMM8>(src:_mm_setzero_pd(), k, a, b)
5570	}
5571
5572	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5573	/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5574	/// of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
5575	/// Lower 2 bits of IMM8 specifies the operation control:
5576	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5577	/// Upper 2 bits of IMM8 specifies the sign control:
5578	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5579	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5580	///
5581	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_ss&ig_expand=5219)
5582	#[inline]
5583	#[target_feature(enable = "avx512dq")]
5584	#[cfg_attr(test, assert_instr(vrangess, IMM8 = `5`, SAE = `8`))]
5585	#[rustc_legacy_const_generics(`2`, `3`)]
5586	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5587	pub fn _mm_range_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
5588	static_assert_uimm_bits!(IMM8, `4`);
5589	static_assert_sae!(SAE);
5590	_mm_mask_range_round_ss::<IMM8, SAE>(src:_mm_setzero_ps(), k:`0xff`, a, b)
5591	}
5592
5593	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5594	/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5595	/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5596	/// upper 3 packed elements from a to the upper elements of dst.
5597	/// Lower 2 bits of IMM8 specifies the operation control:
5598	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5599	/// Upper 2 bits of IMM8 specifies the sign control:
5600	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5601	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5602	///
5603	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_ss&ig_expand=5217)
5604	#[inline]
5605	#[target_feature(enable = "avx512dq")]
5606	#[cfg_attr(test, assert_instr(vrangess, IMM8 = `5`, SAE = `8`))]
5607	#[rustc_legacy_const_generics(`4`, `5`)]
5608	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5609	pub fn _mm_mask_range_round_ss<const IMM8: i32, const SAE: i32>(
5610	src: __m128,
5611	k: __mmask8,
5612	a: __m128,
5613	b: __m128,
5614	) -> __m128 {
5615	unsafe {
5616	static_assert_uimm_bits!(IMM8, `4`);
5617	static_assert_sae!(SAE);
5618	transmute(src:vrangess(
5619	a.as_f32x4(),
5620	b.as_f32x4(),
5621	src.as_f32x4(),
5622	k,
5623	IMM8,
5624	SAE,
5625	))
5626	}
5627	}
5628
5629	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5630	/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5631	/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5632	/// 3 packed elements from a to the upper elements of dst.
5633	/// Lower 2 bits of IMM8 specifies the operation control:
5634	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5635	/// Upper 2 bits of IMM8 specifies the sign control:
5636	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5637	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5638	///
5639	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_ss&ig_expand=5218)
5640	#[inline]
5641	#[target_feature(enable = "avx512dq")]
5642	#[cfg_attr(test, assert_instr(vrangess, IMM8 = `5`, SAE = `8`))]
5643	#[rustc_legacy_const_generics(`3`, `4`)]
5644	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5645	pub fn _mm_maskz_range_round_ss<const IMM8: i32, const SAE: i32>(
5646	k: __mmask8,
5647	a: __m128,
5648	b: __m128,
5649	) -> __m128 {
5650	static_assert_uimm_bits!(IMM8, `4`);
5651	static_assert_sae!(SAE);
5652	_mm_mask_range_round_ss::<IMM8, SAE>(src:_mm_setzero_ps(), k, a, b)
5653	}
5654
5655	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5656	/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5657	/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5658	/// upper 3 packed elements from a to the upper elements of dst.
5659	/// Lower 2 bits of IMM8 specifies the operation control:
5660	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5661	/// Upper 2 bits of IMM8 specifies the sign control:
5662	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5663	///
5664	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ss&ig_expand=5222)
5665	#[inline]
5666	#[target_feature(enable = "avx512dq")]
5667	#[cfg_attr(test, assert_instr(vrangess, IMM8 = `5`))]
5668	#[rustc_legacy_const_generics(`4`)]
5669	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5670	pub fn _mm_mask_range_ss<const IMM8: i32>(
5671	src: __m128,
5672	k: __mmask8,
5673	a: __m128,
5674	b: __m128,
5675	) -> __m128 {
5676	unsafe {
5677	static_assert_uimm_bits!(IMM8, `4`);
5678	transmute(src:vrangess(
5679	a.as_f32x4(),
5680	b.as_f32x4(),
5681	src.as_f32x4(),
5682	k,
5683	IMM8,
5684	_MM_FROUND_CUR_DIRECTION,
5685	))
5686	}
5687	}
5688
5689	/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5690	/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5691	/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5692	/// 3 packed elements from a to the upper elements of dst.
5693	/// Lower 2 bits of IMM8 specifies the operation control:
5694	/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5695	/// Upper 2 bits of IMM8 specifies the sign control:
5696	/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5697	///
5698	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ss&ig_expand=5223)
5699	#[inline]
5700	#[target_feature(enable = "avx512dq")]
5701	#[cfg_attr(test, assert_instr(vrangess, IMM8 = `5`))]
5702	#[rustc_legacy_const_generics(`3`)]
5703	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5704	pub fn _mm_maskz_range_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5705	static_assert_uimm_bits!(IMM8, `4`);
5706	_mm_mask_range_ss::<IMM8>(src:_mm_setzero_ps(), k, a, b)
5707	}
5708
5709	// Reduce
5710
5711	/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5712	/// the number of bits specified by imm8, and store the results in dst.
5713	/// Rounding is done according to the imm8 parameter, which can be one of:
5714	///
5715	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5716	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5717	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5718	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5719	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5720	///
5721	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5722	///
5723	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_pd&ig_expand=5438)
5724	#[inline]
5725	#[target_feature(enable = "avx512dq")]
5726	#[cfg_attr(test, assert_instr(vreducepd, IMM8 = `0`, SAE = `8`))]
5727	#[rustc_legacy_const_generics(`1`, `2`)]
5728	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5729	pub fn _mm512_reduce_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
5730	static_assert_uimm_bits!(IMM8, `8`);
5731	static_assert_sae!(SAE);
5732	_mm512_mask_reduce_round_pd::<IMM8, SAE>(src:_mm512_undefined_pd(), k:`0xff`, a)
5733	}
5734
5735	/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5736	/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5737	/// copied from src to dst if the corresponding mask bit is not set).
5738	/// Rounding is done according to the imm8 parameter, which can be one of:
5739	///
5740	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5741	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5742	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5743	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5744	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5745	///
5746	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5747	///
5748	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_pd&ig_expand=5436)
5749	#[inline]
5750	#[target_feature(enable = "avx512dq")]
5751	#[cfg_attr(test, assert_instr(vreducepd, IMM8 = `0`, SAE = `8`))]
5752	#[rustc_legacy_const_generics(`3`, `4`)]
5753	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5754	pub fn _mm512_mask_reduce_round_pd<const IMM8: i32, const SAE: i32>(
5755	src: __m512d,
5756	k: __mmask8,
5757	a: __m512d,
5758	) -> __m512d {
5759	unsafe {
5760	static_assert_uimm_bits!(IMM8, `8`);
5761	static_assert_sae!(SAE);
5762	transmute(src:vreducepd_512(a.as_f64x8(), IMM8, src.as_f64x8(), k, SAE))
5763	}
5764	}
5765
5766	/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5767	/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5768	/// zeroed out if the corresponding mask bit is not set).
5769	/// Rounding is done according to the imm8 parameter, which can be one of:
5770	///
5771	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5772	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5773	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5774	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5775	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5776	///
5777	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5778	///
5779	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_pd&ig_expand=5437)
5780	#[inline]
5781	#[target_feature(enable = "avx512dq")]
5782	#[cfg_attr(test, assert_instr(vreducepd, IMM8 = `0`, SAE = `8`))]
5783	#[rustc_legacy_const_generics(`2`, `3`)]
5784	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5785	pub fn _mm512_maskz_reduce_round_pd<const IMM8: i32, const SAE: i32>(
5786	k: __mmask8,
5787	a: __m512d,
5788	) -> __m512d {
5789	static_assert_uimm_bits!(IMM8, `8`);
5790	static_assert_sae!(SAE);
5791	_mm512_mask_reduce_round_pd::<IMM8, SAE>(src:_mm512_setzero_pd(), k, a)
5792	}
5793
5794	/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5795	/// the number of bits specified by imm8, and store the results in dst.
5796	/// Rounding is done according to the imm8 parameter, which can be one of:
5797	///
5798	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5799	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5800	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5801	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5802	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5803	///
5804	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_pd&ig_expand=5411)
5805	#[inline]
5806	#[target_feature(enable = "avx512dq,avx512vl")]
5807	#[cfg_attr(test, assert_instr(vreducepd, IMM8 = `0`))]
5808	#[rustc_legacy_const_generics(`1`)]
5809	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5810	pub fn _mm_reduce_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5811	static_assert_uimm_bits!(IMM8, `8`);
5812	_mm_mask_reduce_pd::<IMM8>(src:_mm_undefined_pd(), k:`0xff`, a)
5813	}
5814
5815	/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5816	/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5817	/// copied from src to dst if the corresponding mask bit is not set).
5818	/// Rounding is done according to the imm8 parameter, which can be one of:
5819	///
5820	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5821	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5822	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5823	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5824	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5825	///
5826	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_pd&ig_expand=5409)
5827	#[inline]
5828	#[target_feature(enable = "avx512dq,avx512vl")]
5829	#[cfg_attr(test, assert_instr(vreducepd, IMM8 = `0`))]
5830	#[rustc_legacy_const_generics(`3`)]
5831	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5832	pub fn _mm_mask_reduce_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5833	unsafe {
5834	static_assert_uimm_bits!(IMM8, `8`);
5835	transmute(src:vreducepd_128(a.as_f64x2(), IMM8, src.as_f64x2(), k))
5836	}
5837	}
5838
5839	/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5840	/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5841	/// zeroed out if the corresponding mask bit is not set).
5842	/// Rounding is done according to the imm8 parameter, which can be one of:
5843	///
5844	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5845	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5846	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5847	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5848	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5849	///
5850	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_pd&ig_expand=5410)
5851	#[inline]
5852	#[target_feature(enable = "avx512dq,avx512vl")]
5853	#[cfg_attr(test, assert_instr(vreducepd, IMM8 = `0`))]
5854	#[rustc_legacy_const_generics(`2`)]
5855	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5856	pub fn _mm_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5857	static_assert_uimm_bits!(IMM8, `8`);
5858	_mm_mask_reduce_pd::<IMM8>(src:_mm_setzero_pd(), k, a)
5859	}
5860
5861	/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5862	/// the number of bits specified by imm8, and store the results in dst.
5863	/// Rounding is done according to the imm8 parameter, which can be one of:
5864	///
5865	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5866	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5867	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5868	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5869	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5870	///
5871	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_pd&ig_expand=5414)
5872	#[inline]
5873	#[target_feature(enable = "avx512dq,avx512vl")]
5874	#[cfg_attr(test, assert_instr(vreducepd, IMM8 = `0`))]
5875	#[rustc_legacy_const_generics(`1`)]
5876	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5877	pub fn _mm256_reduce_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5878	static_assert_uimm_bits!(IMM8, `8`);
5879	_mm256_mask_reduce_pd::<IMM8>(src:_mm256_undefined_pd(), k:`0xff`, a)
5880	}
5881
5882	/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5883	/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5884	/// copied from src to dst if the corresponding mask bit is not set).
5885	/// Rounding is done according to the imm8 parameter, which can be one of:
5886	///
5887	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5888	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5889	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5890	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5891	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5892	///
5893	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_pd&ig_expand=5412)
5894	#[inline]
5895	#[target_feature(enable = "avx512dq,avx512vl")]
5896	#[cfg_attr(test, assert_instr(vreducepd, IMM8 = `0`))]
5897	#[rustc_legacy_const_generics(`3`)]
5898	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5899	pub fn _mm256_mask_reduce_pd<const IMM8: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5900	unsafe {
5901	static_assert_uimm_bits!(IMM8, `8`);
5902	transmute(src:vreducepd_256(a.as_f64x4(), IMM8, src.as_f64x4(), k))
5903	}
5904	}
5905
5906	/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5907	/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5908	/// zeroed out if the corresponding mask bit is not set).
5909	/// Rounding is done according to the imm8 parameter, which can be one of:
5910	///
5911	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5912	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5913	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5914	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5915	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5916	///
5917	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_pd&ig_expand=5413)
5918	#[inline]
5919	#[target_feature(enable = "avx512dq,avx512vl")]
5920	#[cfg_attr(test, assert_instr(vreducepd, IMM8 = `0`))]
5921	#[rustc_legacy_const_generics(`2`)]
5922	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5923	pub fn _mm256_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5924	static_assert_uimm_bits!(IMM8, `8`);
5925	_mm256_mask_reduce_pd::<IMM8>(src:_mm256_setzero_pd(), k, a)
5926	}
5927
5928	/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5929	/// the number of bits specified by imm8, and store the results in dst.
5930	/// Rounding is done according to the imm8 parameter, which can be one of:
5931	///
5932	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5933	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5934	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5935	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5936	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5937	///
5938	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_pd&ig_expand=5417)
5939	#[inline]
5940	#[target_feature(enable = "avx512dq")]
5941	#[cfg_attr(test, assert_instr(vreducepd, IMM8 = `0`))]
5942	#[rustc_legacy_const_generics(`1`)]
5943	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5944	pub fn _mm512_reduce_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5945	static_assert_uimm_bits!(IMM8, `8`);
5946	_mm512_mask_reduce_pd::<IMM8>(src:_mm512_undefined_pd(), k:`0xff`, a)
5947	}
5948
5949	/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5950	/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5951	/// copied from src to dst if the corresponding mask bit is not set).
5952	/// Rounding is done according to the imm8 parameter, which can be one of:
5953	///
5954	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5955	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5956	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5957	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5958	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5959	///
5960	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_pd&ig_expand=5415)
5961	#[inline]
5962	#[target_feature(enable = "avx512dq")]
5963	#[cfg_attr(test, assert_instr(vreducepd, IMM8 = `0`))]
5964	#[rustc_legacy_const_generics(`3`)]
5965	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5966	pub fn _mm512_mask_reduce_pd<const IMM8: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5967	unsafe {
5968	static_assert_uimm_bits!(IMM8, `8`);
5969	transmute(src:vreducepd_512(
5970	a.as_f64x8(),
5971	IMM8,
5972	src.as_f64x8(),
5973	k,
5974	_MM_FROUND_CUR_DIRECTION,
5975	))
5976	}
5977	}
5978
5979	/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5980	/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5981	/// zeroed out if the corresponding mask bit is not set).
5982	/// Rounding is done according to the imm8 parameter, which can be one of:
5983	///
5984	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5985	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5986	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5987	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5988	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5989	///
5990	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_pd&ig_expand=5416)
5991	#[inline]
5992	#[target_feature(enable = "avx512dq")]
5993	#[cfg_attr(test, assert_instr(vreducepd, IMM8 = `0`))]
5994	#[rustc_legacy_const_generics(`2`)]
5995	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5996	pub fn _mm512_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5997	static_assert_uimm_bits!(IMM8, `8`);
5998	_mm512_mask_reduce_pd::<IMM8>(src:_mm512_setzero_pd(), k, a)
5999	}
6000
6001	/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6002	/// the number of bits specified by imm8, and store the results in dst.
6003	/// Rounding is done according to the imm8 parameter, which can be one of:
6004	///
6005	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6006	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6007	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6008	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6009	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6010	///
6011	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6012	///
6013	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_ps&ig_expand=5444)
6014	#[inline]
6015	#[target_feature(enable = "avx512dq")]
6016	#[cfg_attr(test, assert_instr(vreduceps, IMM8 = `0`, SAE = `8`))]
6017	#[rustc_legacy_const_generics(`1`, `2`)]
6018	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6019	pub fn _mm512_reduce_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
6020	static_assert_uimm_bits!(IMM8, `8`);
6021	static_assert_sae!(SAE);
6022	_mm512_mask_reduce_round_ps::<IMM8, SAE>(src:_mm512_undefined_ps(), k:`0xffff`, a)
6023	}
6024
6025	/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6026	/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6027	/// copied from src to dst if the corresponding mask bit is not set).
6028	/// Rounding is done according to the imm8 parameter, which can be one of:
6029	///
6030	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6031	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6032	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6033	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6034	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6035	///
6036	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6037	///
6038	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_ps&ig_expand=5442)
6039	#[inline]
6040	#[target_feature(enable = "avx512dq")]
6041	#[cfg_attr(test, assert_instr(vreduceps, IMM8 = `0`, SAE = `8`))]
6042	#[rustc_legacy_const_generics(`3`, `4`)]
6043	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6044	pub fn _mm512_mask_reduce_round_ps<const IMM8: i32, const SAE: i32>(
6045	src: __m512,
6046	k: __mmask16,
6047	a: __m512,
6048	) -> __m512 {
6049	unsafe {
6050	static_assert_uimm_bits!(IMM8, `8`);
6051	static_assert_sae!(SAE);
6052	transmute(src:vreduceps_512(a.as_f32x16(), IMM8, src.as_f32x16(), k, SAE))
6053	}
6054	}
6055
6056	/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6057	/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6058	/// zeroed out if the corresponding mask bit is not set).
6059	/// Rounding is done according to the imm8 parameter, which can be one of:
6060	///
6061	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6062	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6063	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6064	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6065	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6066	///
6067	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6068	///
6069	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_ps&ig_expand=5443)
6070	#[inline]
6071	#[target_feature(enable = "avx512dq")]
6072	#[cfg_attr(test, assert_instr(vreduceps, IMM8 = `0`, SAE = `8`))]
6073	#[rustc_legacy_const_generics(`2`, `3`)]
6074	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6075	pub fn _mm512_maskz_reduce_round_ps<const IMM8: i32, const SAE: i32>(
6076	k: __mmask16,
6077	a: __m512,
6078	) -> __m512 {
6079	static_assert_uimm_bits!(IMM8, `8`);
6080	static_assert_sae!(SAE);
6081	_mm512_mask_reduce_round_ps::<IMM8, SAE>(src:_mm512_setzero_ps(), k, a)
6082	}
6083
6084	/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6085	/// the number of bits specified by imm8, and store the results in dst.
6086	/// Rounding is done according to the imm8 parameter, which can be one of:
6087	///
6088	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6089	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6090	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6091	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6092	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6093	///
6094	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ps&ig_expand=5429)
6095	#[inline]
6096	#[target_feature(enable = "avx512dq,avx512vl")]
6097	#[cfg_attr(test, assert_instr(vreduceps, IMM8 = `0`))]
6098	#[rustc_legacy_const_generics(`1`)]
6099	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6100	pub fn _mm_reduce_ps<const IMM8: i32>(a: __m128) -> __m128 {
6101	static_assert_uimm_bits!(IMM8, `8`);
6102	_mm_mask_reduce_ps::<IMM8>(src:_mm_undefined_ps(), k:`0xff`, a)
6103	}
6104
6105	/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6106	/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6107	/// copied from src to dst if the corresponding mask bit is not set).
6108	/// Rounding is done according to the imm8 parameter, which can be one of:
6109	///
6110	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6111	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6112	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6113	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6114	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6115	///
6116	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ps&ig_expand=5427)
6117	#[inline]
6118	#[target_feature(enable = "avx512dq,avx512vl")]
6119	#[cfg_attr(test, assert_instr(vreduceps, IMM8 = `0`))]
6120	#[rustc_legacy_const_generics(`3`)]
6121	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6122	pub fn _mm_mask_reduce_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
6123	unsafe {
6124	static_assert_uimm_bits!(IMM8, `8`);
6125	transmute(src:vreduceps_128(a.as_f32x4(), IMM8, src.as_f32x4(), k))
6126	}
6127	}
6128
6129	/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6130	/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6131	/// zeroed out if the corresponding mask bit is not set).
6132	/// Rounding is done according to the imm8 parameter, which can be one of:
6133	///
6134	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6135	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6136	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6137	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6138	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6139	///
6140	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ps&ig_expand=5428)
6141	#[inline]
6142	#[target_feature(enable = "avx512dq,avx512vl")]
6143	#[cfg_attr(test, assert_instr(vreduceps, IMM8 = `0`))]
6144	#[rustc_legacy_const_generics(`2`)]
6145	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6146	pub fn _mm_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
6147	static_assert_uimm_bits!(IMM8, `8`);
6148	_mm_mask_reduce_ps::<IMM8>(src:_mm_setzero_ps(), k, a)
6149	}
6150
6151	/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6152	/// the number of bits specified by imm8, and store the results in dst.
6153	/// Rounding is done according to the imm8 parameter, which can be one of:
6154	///
6155	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6156	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6157	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6158	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6159	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6160	///
6161	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_ps&ig_expand=5432)
6162	#[inline]
6163	#[target_feature(enable = "avx512dq,avx512vl")]
6164	#[cfg_attr(test, assert_instr(vreduceps, IMM8 = `0`))]
6165	#[rustc_legacy_const_generics(`1`)]
6166	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6167	pub fn _mm256_reduce_ps<const IMM8: i32>(a: __m256) -> __m256 {
6168	static_assert_uimm_bits!(IMM8, `8`);
6169	_mm256_mask_reduce_ps::<IMM8>(src:_mm256_undefined_ps(), k:`0xff`, a)
6170	}
6171
6172	/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6173	/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6174	/// copied from src to dst if the corresponding mask bit is not set).
6175	/// Rounding is done according to the imm8 parameter, which can be one of:
6176	///
6177	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6178	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6179	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6180	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6181	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6182	///
6183	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_ps&ig_expand=5430)
6184	#[inline]
6185	#[target_feature(enable = "avx512dq,avx512vl")]
6186	#[cfg_attr(test, assert_instr(vreduceps, IMM8 = `0`))]
6187	#[rustc_legacy_const_generics(`3`)]
6188	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6189	pub fn _mm256_mask_reduce_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
6190	unsafe {
6191	static_assert_uimm_bits!(IMM8, `8`);
6192	transmute(src:vreduceps_256(a.as_f32x8(), IMM8, src.as_f32x8(), k))
6193	}
6194	}
6195
6196	/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6197	/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6198	/// zeroed out if the corresponding mask bit is not set).
6199	/// Rounding is done according to the imm8 parameter, which can be one of:
6200	///
6201	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6202	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6203	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6204	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6205	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6206	///
6207	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_ps&ig_expand=5431)
6208	#[inline]
6209	#[target_feature(enable = "avx512dq,avx512vl")]
6210	#[cfg_attr(test, assert_instr(vreduceps, IMM8 = `0`))]
6211	#[rustc_legacy_const_generics(`2`)]
6212	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6213	pub fn _mm256_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
6214	static_assert_uimm_bits!(IMM8, `8`);
6215	_mm256_mask_reduce_ps::<IMM8>(src:_mm256_setzero_ps(), k, a)
6216	}
6217
6218	/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6219	/// the number of bits specified by imm8, and store the results in dst.
6220	/// Rounding is done according to the imm8 parameter, which can be one of:
6221	///
6222	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6223	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6224	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6225	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6226	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6227	///
6228	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_ps&ig_expand=5435)
6229	#[inline]
6230	#[target_feature(enable = "avx512dq")]
6231	#[cfg_attr(test, assert_instr(vreduceps, IMM8 = `0`))]
6232	#[rustc_legacy_const_generics(`1`)]
6233	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6234	pub fn _mm512_reduce_ps<const IMM8: i32>(a: __m512) -> __m512 {
6235	static_assert_uimm_bits!(IMM8, `8`);
6236	_mm512_mask_reduce_ps::<IMM8>(src:_mm512_undefined_ps(), k:`0xffff`, a)
6237	}
6238
6239	/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6240	/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6241	/// copied from src to dst if the corresponding mask bit is not set).
6242	/// Rounding is done according to the imm8 parameter, which can be one of:
6243	///
6244	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6245	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6246	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6247	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6248	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6249	///
6250	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_ps&ig_expand=5433)
6251	#[inline]
6252	#[target_feature(enable = "avx512dq")]
6253	#[cfg_attr(test, assert_instr(vreduceps, IMM8 = `0`))]
6254	#[rustc_legacy_const_generics(`3`)]
6255	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6256	pub fn _mm512_mask_reduce_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
6257	unsafe {
6258	static_assert_uimm_bits!(IMM8, `8`);
6259	transmute(src:vreduceps_512(
6260	a.as_f32x16(),
6261	IMM8,
6262	src.as_f32x16(),
6263	k,
6264	_MM_FROUND_CUR_DIRECTION,
6265	))
6266	}
6267	}
6268
6269	/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6270	/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6271	/// zeroed out if the corresponding mask bit is not set).
6272	/// Rounding is done according to the imm8 parameter, which can be one of:
6273	///
6274	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6275	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6276	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6277	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6278	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6279	///
6280	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_ps&ig_expand=5434)
6281	#[inline]
6282	#[target_feature(enable = "avx512dq")]
6283	#[cfg_attr(test, assert_instr(vreduceps, IMM8 = `0`))]
6284	#[rustc_legacy_const_generics(`2`)]
6285	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6286	pub fn _mm512_maskz_reduce_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
6287	static_assert_uimm_bits!(IMM8, `8`);
6288	_mm512_mask_reduce_ps::<IMM8>(src:_mm512_setzero_ps(), k, a)
6289	}
6290
6291	/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6292	/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6293	/// the upper element from a to the upper element of dst.
6294	/// Rounding is done according to the imm8 parameter, which can be one of:
6295	///
6296	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6297	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6298	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6299	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6300	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6301	///
6302	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6303	///
6304	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_sd&ig_expand=5447)
6305	#[inline]
6306	#[target_feature(enable = "avx512dq")]
6307	#[cfg_attr(test, assert_instr(vreducesd, IMM8 = `0`, SAE = `8`))]
6308	#[rustc_legacy_const_generics(`2`, `3`)]
6309	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6310	pub fn _mm_reduce_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
6311	static_assert_uimm_bits!(IMM8, `8`);
6312	static_assert_sae!(SAE);
6313	_mm_mask_reduce_round_sd::<IMM8, SAE>(src:_mm_undefined_pd(), k:`0xff`, a, b)
6314	}
6315
6316	/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6317	/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6318	/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
6319	/// to the upper element of dst.
6320	/// Rounding is done according to the imm8 parameter, which can be one of:
6321	///
6322	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6323	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6324	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6325	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6326	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6327	///
6328	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6329	///
6330	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_sd&ig_expand=5445)
6331	#[inline]
6332	#[target_feature(enable = "avx512dq")]
6333	#[cfg_attr(test, assert_instr(vreducesd, IMM8 = `0`, SAE = `8`))]
6334	#[rustc_legacy_const_generics(`4`, `5`)]
6335	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6336	pub fn _mm_mask_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6337	src: __m128d,
6338	k: __mmask8,
6339	a: __m128d,
6340	b: __m128d,
6341	) -> __m128d {
6342	unsafe {
6343	static_assert_uimm_bits!(IMM8, `8`);
6344	static_assert_sae!(SAE);
6345	transmute(src:vreducesd(
6346	a.as_f64x2(),
6347	b.as_f64x2(),
6348	src.as_f64x2(),
6349	k,
6350	IMM8,
6351	SAE,
6352	))
6353	}
6354	}
6355
6356	/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6357	/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6358	/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
6359	/// to the upper element of dst.
6360	/// Rounding is done according to the imm8 parameter, which can be one of:
6361	///
6362	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6363	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6364	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6365	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6366	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6367	///
6368	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6369	///
6370	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_sd&ig_expand=5446)
6371	#[inline]
6372	#[target_feature(enable = "avx512dq")]
6373	#[cfg_attr(test, assert_instr(vreducesd, IMM8 = `0`, SAE = `8`))]
6374	#[rustc_legacy_const_generics(`3`, `4`)]
6375	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6376	pub fn _mm_maskz_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6377	k: __mmask8,
6378	a: __m128d,
6379	b: __m128d,
6380	) -> __m128d {
6381	static_assert_uimm_bits!(IMM8, `8`);
6382	static_assert_sae!(SAE);
6383	_mm_mask_reduce_round_sd::<IMM8, SAE>(src:_mm_setzero_pd(), k, a, b)
6384	}
6385
6386	/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6387	/// by the number of bits specified by imm8, store the result in the lower element of dst using, and
6388	/// copy the upper element from a.
6389	/// to the upper element of dst.
6390	/// Rounding is done according to the imm8 parameter, which can be one of:
6391	///
6392	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6393	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6394	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6395	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6396	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6397	///
6398	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_sd&ig_expand=5456)
6399	#[inline]
6400	#[target_feature(enable = "avx512dq")]
6401	#[cfg_attr(test, assert_instr(vreducesd, IMM8 = `0`))]
6402	#[rustc_legacy_const_generics(`2`)]
6403	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6404	pub fn _mm_reduce_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
6405	static_assert_uimm_bits!(IMM8, `8`);
6406	_mm_mask_reduce_sd::<IMM8>(src:_mm_undefined_pd(), k:`0xff`, a, b)
6407	}
6408
6409	/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6410	/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6411	/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
6412	/// to the upper element of dst.
6413	/// Rounding is done according to the imm8 parameter, which can be one of:
6414	///
6415	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6416	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6417	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6418	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6419	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6420	///
6421	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_sd&ig_expand=5454)
6422	#[inline]
6423	#[target_feature(enable = "avx512dq")]
6424	#[cfg_attr(test, assert_instr(vreducesd, IMM8 = `0`))]
6425	#[rustc_legacy_const_generics(`4`)]
6426	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6427	pub fn _mm_mask_reduce_sd<const IMM8: i32>(
6428	src: __m128d,
6429	k: __mmask8,
6430	a: __m128d,
6431	b: __m128d,
6432	) -> __m128d {
6433	unsafe {
6434	static_assert_uimm_bits!(IMM8, `8`);
6435	transmute(src:vreducesd(
6436	a.as_f64x2(),
6437	b.as_f64x2(),
6438	src.as_f64x2(),
6439	k,
6440	IMM8,
6441	_MM_FROUND_CUR_DIRECTION,
6442	))
6443	}
6444	}
6445
6446	/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6447	/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6448	/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
6449	/// to the upper element of dst.
6450	/// Rounding is done according to the imm8 parameter, which can be one of:
6451	///
6452	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6453	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6454	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6455	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6456	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6457	///
6458	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_sd&ig_expand=5455)
6459	#[inline]
6460	#[target_feature(enable = "avx512dq")]
6461	#[cfg_attr(test, assert_instr(vreducesd, IMM8 = `0`))]
6462	#[rustc_legacy_const_generics(`3`)]
6463	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6464	pub fn _mm_maskz_reduce_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6465	static_assert_uimm_bits!(IMM8, `8`);
6466	_mm_mask_reduce_sd::<IMM8>(src:_mm_setzero_pd(), k, a, b)
6467	}
6468
6469	/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6470	/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6471	/// the upper element from a.
6472	/// to the upper element of dst.
6473	/// Rounding is done according to the imm8 parameter, which can be one of:
6474	///
6475	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6476	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6477	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6478	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6479	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6480	///
6481	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6482	///
6483	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_ss&ig_expand=5453)
6484	#[inline]
6485	#[target_feature(enable = "avx512dq")]
6486	#[cfg_attr(test, assert_instr(vreducess, IMM8 = `0`, SAE = `8`))]
6487	#[rustc_legacy_const_generics(`2`, `3`)]
6488	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6489	pub fn _mm_reduce_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
6490	static_assert_uimm_bits!(IMM8, `8`);
6491	static_assert_sae!(SAE);
6492	_mm_mask_reduce_round_ss::<IMM8, SAE>(src:_mm_undefined_ps(), k:`0xff`, a, b)
6493	}
6494
6495	/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6496	/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6497	/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
6498	/// to the upper element of dst.
6499	/// Rounding is done according to the imm8 parameter, which can be one of:
6500	///
6501	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6502	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6503	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6504	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6505	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6506	///
6507	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6508	///
6509	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_ss&ig_expand=5451)
6510	#[inline]
6511	#[target_feature(enable = "avx512dq")]
6512	#[cfg_attr(test, assert_instr(vreducess, IMM8 = `0`, SAE = `8`))]
6513	#[rustc_legacy_const_generics(`4`, `5`)]
6514	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6515	pub fn _mm_mask_reduce_round_ss<const IMM8: i32, const SAE: i32>(
6516	src: __m128,
6517	k: __mmask8,
6518	a: __m128,
6519	b: __m128,
6520	) -> __m128 {
6521	unsafe {
6522	static_assert_uimm_bits!(IMM8, `8`);
6523	static_assert_sae!(SAE);
6524	transmute(src:vreducess(
6525	a.as_f32x4(),
6526	b.as_f32x4(),
6527	src.as_f32x4(),
6528	k,
6529	IMM8,
6530	SAE,
6531	))
6532	}
6533	}
6534
6535	/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6536	/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6537	/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
6538	/// to the upper element of dst.
6539	/// Rounding is done according to the imm8 parameter, which can be one of:
6540	///
6541	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6542	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6543	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6544	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6545	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6546	///
6547	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6548	///
6549	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_ss&ig_expand=5452)
6550	#[inline]
6551	#[target_feature(enable = "avx512dq")]
6552	#[cfg_attr(test, assert_instr(vreducess, IMM8 = `0`, SAE = `8`))]
6553	#[rustc_legacy_const_generics(`3`, `4`)]
6554	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6555	pub fn _mm_maskz_reduce_round_ss<const IMM8: i32, const SAE: i32>(
6556	k: __mmask8,
6557	a: __m128,
6558	b: __m128,
6559	) -> __m128 {
6560	static_assert_uimm_bits!(IMM8, `8`);
6561	static_assert_sae!(SAE);
6562	_mm_mask_reduce_round_ss::<IMM8, SAE>(src:_mm_setzero_ps(), k, a, b)
6563	}
6564
6565	/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6566	/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6567	/// the upper element from a.
6568	/// to the upper element of dst.
6569	/// Rounding is done according to the imm8 parameter, which can be one of:
6570	///
6571	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6572	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6573	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6574	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6575	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6576	///
6577	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ss&ig_expand=5462)
6578	#[inline]
6579	#[target_feature(enable = "avx512dq")]
6580	#[cfg_attr(test, assert_instr(vreducess, IMM8 = `0`))]
6581	#[rustc_legacy_const_generics(`2`)]
6582	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6583	pub fn _mm_reduce_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
6584	static_assert_uimm_bits!(IMM8, `8`);
6585	_mm_mask_reduce_ss::<IMM8>(src:_mm_undefined_ps(), k:`0xff`, a, b)
6586	}
6587
6588	/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6589	/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6590	/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
6591	/// to the upper element of dst.
6592	/// Rounding is done according to the imm8 parameter, which can be one of:
6593	///
6594	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6595	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6596	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6597	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6598	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6599	///
6600	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ss&ig_expand=5460)
6601	#[inline]
6602	#[target_feature(enable = "avx512dq")]
6603	#[cfg_attr(test, assert_instr(vreducess, IMM8 = `0`))]
6604	#[rustc_legacy_const_generics(`4`)]
6605	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6606	pub fn _mm_mask_reduce_ss<const IMM8: i32>(
6607	src: __m128,
6608	k: __mmask8,
6609	a: __m128,
6610	b: __m128,
6611	) -> __m128 {
6612	unsafe {
6613	static_assert_uimm_bits!(IMM8, `8`);
6614	transmute(src:vreducess(
6615	a.as_f32x4(),
6616	b.as_f32x4(),
6617	src.as_f32x4(),
6618	k,
6619	IMM8,
6620	_MM_FROUND_CUR_DIRECTION,
6621	))
6622	}
6623	}
6624
6625	/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6626	/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6627	/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
6628	/// to the upper element of dst.
6629	/// Rounding is done according to the imm8 parameter, which can be one of:
6630	///
6631	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6632	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6633	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6634	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6635	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6636	///
6637	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ss&ig_expand=5461)
6638	#[inline]
6639	#[target_feature(enable = "avx512dq")]
6640	#[cfg_attr(test, assert_instr(vreducess, IMM8 = `0`))]
6641	#[rustc_legacy_const_generics(`3`)]
6642	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6643	pub fn _mm_maskz_reduce_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6644	static_assert_uimm_bits!(IMM8, `8`);
6645	_mm_mask_reduce_ss::<IMM8>(src:_mm_setzero_ps(), k, a, b)
6646	}
6647
6648	// FP-Class
6649
6650	/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6651	/// by imm8, and store the results in mask vector k.
6652	/// imm can be a combination of:
6653	///
6654	/// - 0x01 // QNaN
6655	/// - 0x02 // Positive Zero
6656	/// - 0x04 // Negative Zero
6657	/// - 0x08 // Positive Infinity
6658	/// - 0x10 // Negative Infinity
6659	/// - 0x20 // Denormal
6660	/// - 0x40 // Negative
6661	/// - 0x80 // SNaN
6662	///
6663	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_pd_mask&ig_expand=3493)
6664	#[inline]
6665	#[target_feature(enable = "avx512dq,avx512vl")]
6666	#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = `0`))]
6667	#[rustc_legacy_const_generics(`1`)]
6668	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6669	pub fn _mm_fpclass_pd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
6670	static_assert_uimm_bits!(IMM8, `8`);
6671	_mm_mask_fpclass_pd_mask::<IMM8>(k1:`0xff`, a)
6672	}
6673
6674	/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6675	/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6676	/// corresponding mask bit is not set).
6677	/// imm can be a combination of:
6678	///
6679	/// - 0x01 // QNaN
6680	/// - 0x02 // Positive Zero
6681	/// - 0x04 // Negative Zero
6682	/// - 0x08 // Positive Infinity
6683	/// - 0x10 // Negative Infinity
6684	/// - 0x20 // Denormal
6685	/// - 0x40 // Negative
6686	/// - 0x80 // SNaN
6687	///
6688	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_pd_mask&ig_expand=3494)
6689	#[inline]
6690	#[target_feature(enable = "avx512dq,avx512vl")]
6691	#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = `0`))]
6692	#[rustc_legacy_const_generics(`2`)]
6693	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6694	pub fn _mm_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
6695	unsafe {
6696	static_assert_uimm_bits!(IMM8, `8`);
6697	transmute(src:vfpclasspd_128(a.as_f64x2(), IMM8, k:k1))
6698	}
6699	}
6700
6701	/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6702	/// by imm8, and store the results in mask vector k.
6703	/// imm can be a combination of:
6704	///
6705	/// - 0x01 // QNaN
6706	/// - 0x02 // Positive Zero
6707	/// - 0x04 // Negative Zero
6708	/// - 0x08 // Positive Infinity
6709	/// - 0x10 // Negative Infinity
6710	/// - 0x20 // Denormal
6711	/// - 0x40 // Negative
6712	/// - 0x80 // SNaN
6713	///
6714	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_pd_mask&ig_expand=3495)
6715	#[inline]
6716	#[target_feature(enable = "avx512dq,avx512vl")]
6717	#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = `0`))]
6718	#[rustc_legacy_const_generics(`1`)]
6719	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6720	pub fn _mm256_fpclass_pd_mask<const IMM8: i32>(a: __m256d) -> __mmask8 {
6721	static_assert_uimm_bits!(IMM8, `8`);
6722	_mm256_mask_fpclass_pd_mask::<IMM8>(k1:`0xff`, a)
6723	}
6724
6725	/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6726	/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6727	/// corresponding mask bit is not set).
6728	/// imm can be a combination of:
6729	///
6730	/// - 0x01 // QNaN
6731	/// - 0x02 // Positive Zero
6732	/// - 0x04 // Negative Zero
6733	/// - 0x08 // Positive Infinity
6734	/// - 0x10 // Negative Infinity
6735	/// - 0x20 // Denormal
6736	/// - 0x40 // Negative
6737	/// - 0x80 // SNaN
6738	///
6739	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_pd_mask&ig_expand=3496)
6740	#[inline]
6741	#[target_feature(enable = "avx512dq,avx512vl")]
6742	#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = `0`))]
6743	#[rustc_legacy_const_generics(`2`)]
6744	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6745	pub fn _mm256_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d) -> __mmask8 {
6746	unsafe {
6747	static_assert_uimm_bits!(IMM8, `8`);
6748	transmute(src:vfpclasspd_256(a.as_f64x4(), IMM8, k:k1))
6749	}
6750	}
6751
6752	/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6753	/// by imm8, and store the results in mask vector k.
6754	/// imm can be a combination of:
6755	///
6756	/// - 0x01 // QNaN
6757	/// - 0x02 // Positive Zero
6758	/// - 0x04 // Negative Zero
6759	/// - 0x08 // Positive Infinity
6760	/// - 0x10 // Negative Infinity
6761	/// - 0x20 // Denormal
6762	/// - 0x40 // Negative
6763	/// - 0x80 // SNaN
6764	///
6765	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_pd_mask&ig_expand=3497)
6766	#[inline]
6767	#[target_feature(enable = "avx512dq")]
6768	#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = `0`))]
6769	#[rustc_legacy_const_generics(`1`)]
6770	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6771	pub fn _mm512_fpclass_pd_mask<const IMM8: i32>(a: __m512d) -> __mmask8 {
6772	static_assert_uimm_bits!(IMM8, `8`);
6773	_mm512_mask_fpclass_pd_mask::<IMM8>(k1:`0xff`, a)
6774	}
6775
6776	/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6777	/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6778	/// corresponding mask bit is not set).
6779	/// imm can be a combination of:
6780	///
6781	/// - 0x01 // QNaN
6782	/// - 0x02 // Positive Zero
6783	/// - 0x04 // Negative Zero
6784	/// - 0x08 // Positive Infinity
6785	/// - 0x10 // Negative Infinity
6786	/// - 0x20 // Denormal
6787	/// - 0x40 // Negative
6788	/// - 0x80 // SNaN
6789	///
6790	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_pd_mask&ig_expand=3498)
6791	#[inline]
6792	#[target_feature(enable = "avx512dq")]
6793	#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = `0`))]
6794	#[rustc_legacy_const_generics(`2`)]
6795	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6796	pub fn _mm512_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d) -> __mmask8 {
6797	unsafe {
6798	static_assert_uimm_bits!(IMM8, `8`);
6799	transmute(src:vfpclasspd_512(a.as_f64x8(), IMM8, k:k1))
6800	}
6801	}
6802
6803	/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6804	/// by imm8, and store the results in mask vector k.
6805	/// imm can be a combination of:
6806	///
6807	/// - 0x01 // QNaN
6808	/// - 0x02 // Positive Zero
6809	/// - 0x04 // Negative Zero
6810	/// - 0x08 // Positive Infinity
6811	/// - 0x10 // Negative Infinity
6812	/// - 0x20 // Denormal
6813	/// - 0x40 // Negative
6814	/// - 0x80 // SNaN
6815	///
6816	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ps_mask&ig_expand=3505)
6817	#[inline]
6818	#[target_feature(enable = "avx512dq,avx512vl")]
6819	#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = `0`))]
6820	#[rustc_legacy_const_generics(`1`)]
6821	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6822	pub fn _mm_fpclass_ps_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
6823	static_assert_uimm_bits!(IMM8, `8`);
6824	_mm_mask_fpclass_ps_mask::<IMM8>(k1:`0xff`, a)
6825	}
6826
6827	/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6828	/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6829	/// corresponding mask bit is not set).
6830	/// imm can be a combination of:
6831	///
6832	/// - 0x01 // QNaN
6833	/// - 0x02 // Positive Zero
6834	/// - 0x04 // Negative Zero
6835	/// - 0x08 // Positive Infinity
6836	/// - 0x10 // Negative Infinity
6837	/// - 0x20 // Denormal
6838	/// - 0x40 // Negative
6839	/// - 0x80 // SNaN
6840	///
6841	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ps_mask&ig_expand=3506)
6842	#[inline]
6843	#[target_feature(enable = "avx512dq,avx512vl")]
6844	#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = `0`))]
6845	#[rustc_legacy_const_generics(`2`)]
6846	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6847	pub fn _mm_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
6848	unsafe {
6849	static_assert_uimm_bits!(IMM8, `8`);
6850	transmute(src:vfpclassps_128(a.as_f32x4(), IMM8, k:k1))
6851	}
6852	}
6853
6854	/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6855	/// by imm8, and store the results in mask vector k.
6856	/// imm can be a combination of:
6857	///
6858	/// - 0x01 // QNaN
6859	/// - 0x02 // Positive Zero
6860	/// - 0x04 // Negative Zero
6861	/// - 0x08 // Positive Infinity
6862	/// - 0x10 // Negative Infinity
6863	/// - 0x20 // Denormal
6864	/// - 0x40 // Negative
6865	/// - 0x80 // SNaN
6866	///
6867	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_ps_mask&ig_expand=3507)
6868	#[inline]
6869	#[target_feature(enable = "avx512dq,avx512vl")]
6870	#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = `0`))]
6871	#[rustc_legacy_const_generics(`1`)]
6872	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6873	pub fn _mm256_fpclass_ps_mask<const IMM8: i32>(a: __m256) -> __mmask8 {
6874	static_assert_uimm_bits!(IMM8, `8`);
6875	_mm256_mask_fpclass_ps_mask::<IMM8>(k1:`0xff`, a)
6876	}
6877
6878	/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6879	/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6880	/// corresponding mask bit is not set).
6881	/// imm can be a combination of:
6882	///
6883	/// - 0x01 // QNaN
6884	/// - 0x02 // Positive Zero
6885	/// - 0x04 // Negative Zero
6886	/// - 0x08 // Positive Infinity
6887	/// - 0x10 // Negative Infinity
6888	/// - 0x20 // Denormal
6889	/// - 0x40 // Negative
6890	/// - 0x80 // SNaN
6891	///
6892	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_ps_mask&ig_expand=3508)
6893	#[inline]
6894	#[target_feature(enable = "avx512dq,avx512vl")]
6895	#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = `0`))]
6896	#[rustc_legacy_const_generics(`2`)]
6897	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6898	pub fn _mm256_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256) -> __mmask8 {
6899	unsafe {
6900	static_assert_uimm_bits!(IMM8, `8`);
6901	transmute(src:vfpclassps_256(a.as_f32x8(), IMM8, k:k1))
6902	}
6903	}
6904
6905	/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6906	/// by imm8, and store the results in mask vector k.
6907	/// imm can be a combination of:
6908	///
6909	/// - 0x01 // QNaN
6910	/// - 0x02 // Positive Zero
6911	/// - 0x04 // Negative Zero
6912	/// - 0x08 // Positive Infinity
6913	/// - 0x10 // Negative Infinity
6914	/// - 0x20 // Denormal
6915	/// - 0x40 // Negative
6916	/// - 0x80 // SNaN
6917	///
6918	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_ps_mask&ig_expand=3509)
6919	#[inline]
6920	#[target_feature(enable = "avx512dq")]
6921	#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = `0`))]
6922	#[rustc_legacy_const_generics(`1`)]
6923	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6924	pub fn _mm512_fpclass_ps_mask<const IMM8: i32>(a: __m512) -> __mmask16 {
6925	static_assert_uimm_bits!(IMM8, `8`);
6926	_mm512_mask_fpclass_ps_mask::<IMM8>(k1:`0xffff`, a)
6927	}
6928
6929	/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6930	/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6931	/// corresponding mask bit is not set).
6932	/// imm can be a combination of:
6933	///
6934	/// - 0x01 // QNaN
6935	/// - 0x02 // Positive Zero
6936	/// - 0x04 // Negative Zero
6937	/// - 0x08 // Positive Infinity
6938	/// - 0x10 // Negative Infinity
6939	/// - 0x20 // Denormal
6940	/// - 0x40 // Negative
6941	/// - 0x80 // SNaN
6942	///
6943	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_ps_mask&ig_expand=3510)
6944	#[inline]
6945	#[target_feature(enable = "avx512dq")]
6946	#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = `0`))]
6947	#[rustc_legacy_const_generics(`2`)]
6948	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6949	pub fn _mm512_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512) -> __mmask16 {
6950	unsafe {
6951	static_assert_uimm_bits!(IMM8, `8`);
6952	transmute(src:vfpclassps_512(a.as_f32x16(), IMM8, k:k1))
6953	}
6954	}
6955
6956	/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
6957	/// by imm8, and store the results in mask vector k.
6958	/// imm can be a combination of:
6959	///
6960	/// - 0x01 // QNaN
6961	/// - 0x02 // Positive Zero
6962	/// - 0x04 // Negative Zero
6963	/// - 0x08 // Positive Infinity
6964	/// - 0x10 // Negative Infinity
6965	/// - 0x20 // Denormal
6966	/// - 0x40 // Negative
6967	/// - 0x80 // SNaN
6968	///
6969	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_sd_mask&ig_expand=3511)
6970	#[inline]
6971	#[target_feature(enable = "avx512dq")]
6972	#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = `0`))]
6973	#[rustc_legacy_const_generics(`1`)]
6974	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6975	pub fn _mm_fpclass_sd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
6976	static_assert_uimm_bits!(IMM8, `8`);
6977	_mm_mask_fpclass_sd_mask::<IMM8>(k1:`0xff`, a)
6978	}
6979
6980	/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
6981	/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6982	/// corresponding mask bit is not set).
6983	/// imm can be a combination of:
6984	///
6985	/// - 0x01 // QNaN
6986	/// - 0x02 // Positive Zero
6987	/// - 0x04 // Negative Zero
6988	/// - 0x08 // Positive Infinity
6989	/// - 0x10 // Negative Infinity
6990	/// - 0x20 // Denormal
6991	/// - 0x40 // Negative
6992	/// - 0x80 // SNaN
6993	///
6994	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_sd_mask&ig_expand=3512)
6995	#[inline]
6996	#[target_feature(enable = "avx512dq")]
6997	#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = `0`))]
6998	#[rustc_legacy_const_generics(`2`)]
6999	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7000	pub fn _mm_mask_fpclass_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
7001	unsafe {
7002	static_assert_uimm_bits!(IMM8, `8`);
7003	vfpclasssd(a.as_f64x2(), IMM8, k:k1)
7004	}
7005	}
7006
7007	/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
7008	/// by imm8, and store the results in mask vector k.
7009	/// imm can be a combination of:
7010	///
7011	/// - 0x01 // QNaN
7012	/// - 0x02 // Positive Zero
7013	/// - 0x04 // Negative Zero
7014	/// - 0x08 // Positive Infinity
7015	/// - 0x10 // Negative Infinity
7016	/// - 0x20 // Denormal
7017	/// - 0x40 // Negative
7018	/// - 0x80 // SNaN
7019	///
7020	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ss_mask&ig_expand=3515)
7021	#[inline]
7022	#[target_feature(enable = "avx512dq")]
7023	#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = `0`))]
7024	#[rustc_legacy_const_generics(`1`)]
7025	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7026	pub fn _mm_fpclass_ss_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
7027	static_assert_uimm_bits!(IMM8, `8`);
7028	_mm_mask_fpclass_ss_mask::<IMM8>(k1:`0xff`, a)
7029	}
7030
7031	/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
7032	/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7033	/// corresponding mask bit is not set).
7034	/// imm can be a combination of:
7035	///
7036	/// - 0x01 // QNaN
7037	/// - 0x02 // Positive Zero
7038	/// - 0x04 // Negative Zero
7039	/// - 0x08 // Positive Infinity
7040	/// - 0x10 // Negative Infinity
7041	/// - 0x20 // Denormal
7042	/// - 0x40 // Negative
7043	/// - 0x80 // SNaN
7044	///
7045	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ss_mask&ig_expand=3516)
7046	#[inline]
7047	#[target_feature(enable = "avx512dq")]
7048	#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = `0`))]
7049	#[rustc_legacy_const_generics(`2`)]
7050	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7051	pub fn _mm_mask_fpclass_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
7052	unsafe {
7053	static_assert_uimm_bits!(IMM8, `8`);
7054	vfpclassss(a.as_f32x4(), IMM8, k:k1)
7055	}
7056	}
7057
7058	#[allow(improper_ctypes)]
7059	unsafe extern "C" {
7060	#[link_name = "llvm.x86.avx512.sitofp.round.v2f64.v2i64"]
7061	unsafefn vcvtqq2pd_128(a: i64x2, rounding: i32) -> f64x2;
7062	#[link_name = "llvm.x86.avx512.sitofp.round.v4f64.v4i64"]
7063	unsafefn vcvtqq2pd_256(a: i64x4, rounding: i32) -> f64x4;
7064	#[link_name = "llvm.x86.avx512.sitofp.round.v8f64.v8i64"]
7065	unsafefn vcvtqq2pd_512(a: i64x8, rounding: i32) -> f64x8;
7066
7067	#[link_name = "llvm.x86.avx512.mask.cvtqq2ps.128"]
7068	unsafefn vcvtqq2ps_128(a: i64x2, src: f32x4, k: __mmask8) -> f32x4;
7069	#[link_name = "llvm.x86.avx512.sitofp.round.v4f32.v4i64"]
7070	unsafefn vcvtqq2ps_256(a: i64x4, rounding: i32) -> f32x4;
7071	#[link_name = "llvm.x86.avx512.sitofp.round.v8f32.v8i64"]
7072	unsafefn vcvtqq2ps_512(a: i64x8, rounding: i32) -> f32x8;
7073
7074	#[link_name = "llvm.x86.avx512.uitofp.round.v2f64.v2i64"]
7075	unsafefn vcvtuqq2pd_128(a: u64x2, rounding: i32) -> f64x2;
7076	#[link_name = "llvm.x86.avx512.uitofp.round.v4f64.v4i64"]
7077	unsafefn vcvtuqq2pd_256(a: u64x4, rounding: i32) -> f64x4;
7078	#[link_name = "llvm.x86.avx512.uitofp.round.v8f64.v8i64"]
7079	unsafefn vcvtuqq2pd_512(a: u64x8, rounding: i32) -> f64x8;
7080
7081	#[link_name = "llvm.x86.avx512.mask.cvtuqq2ps.128"]
7082	unsafefn vcvtuqq2ps_128(a: u64x2, src: f32x4, k: __mmask8) -> f32x4;
7083	#[link_name = "llvm.x86.avx512.uitofp.round.v4f32.v4i64"]
7084	unsafefn vcvtuqq2ps_256(a: u64x4, rounding: i32) -> f32x4;
7085	#[link_name = "llvm.x86.avx512.uitofp.round.v8f32.v8i64"]
7086	unsafefn vcvtuqq2ps_512(a: u64x8, rounding: i32) -> f32x8;
7087
7088	#[link_name = "llvm.x86.avx512.mask.cvtpd2qq.128"]
7089	unsafefn vcvtpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
7090	#[link_name = "llvm.x86.avx512.mask.cvtpd2qq.256"]
7091	unsafefn vcvtpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
7092	#[link_name = "llvm.x86.avx512.mask.cvtpd2qq.512"]
7093	unsafefn vcvtpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
7094
7095	#[link_name = "llvm.x86.avx512.mask.cvtps2qq.128"]
7096	unsafefn vcvtps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
7097	#[link_name = "llvm.x86.avx512.mask.cvtps2qq.256"]
7098	unsafefn vcvtps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
7099	#[link_name = "llvm.x86.avx512.mask.cvtps2qq.512"]
7100	unsafefn vcvtps2qq_512(a: f32x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
7101
7102	#[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.128"]
7103	unsafefn vcvtpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
7104	#[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.256"]
7105	unsafefn vcvtpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
7106	#[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.512"]
7107	unsafefn vcvtpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
7108
7109	#[link_name = "llvm.x86.avx512.mask.cvtps2uqq.128"]
7110	unsafefn vcvtps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
7111	#[link_name = "llvm.x86.avx512.mask.cvtps2uqq.256"]
7112	unsafefn vcvtps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
7113	#[link_name = "llvm.x86.avx512.mask.cvtps2uqq.512"]
7114	unsafefn vcvtps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
7115
7116	#[link_name = "llvm.x86.avx512.mask.cvttpd2qq.128"]
7117	unsafefn vcvttpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
7118	#[link_name = "llvm.x86.avx512.mask.cvttpd2qq.256"]
7119	unsafefn vcvttpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
7120	#[link_name = "llvm.x86.avx512.mask.cvttpd2qq.512"]
7121	unsafefn vcvttpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
7122
7123	#[link_name = "llvm.x86.avx512.mask.cvttps2qq.128"]
7124	unsafefn vcvttps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
7125	#[link_name = "llvm.x86.avx512.mask.cvttps2qq.256"]
7126	unsafefn vcvttps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
7127	#[link_name = "llvm.x86.avx512.mask.cvttps2qq.512"]
7128	unsafefn vcvttps2qq_512(a: f32x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
7129
7130	#[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.128"]
7131	unsafefn vcvttpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
7132	#[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.256"]
7133	unsafefn vcvttpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
7134	#[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.512"]
7135	unsafefn vcvttpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
7136
7137	#[link_name = "llvm.x86.avx512.mask.cvttps2uqq.128"]
7138	unsafefn vcvttps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
7139	#[link_name = "llvm.x86.avx512.mask.cvttps2uqq.256"]
7140	unsafefn vcvttps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
7141	#[link_name = "llvm.x86.avx512.mask.cvttps2uqq.512"]
7142	unsafefn vcvttps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
7143
7144	#[link_name = "llvm.x86.avx512.mask.range.pd.128"]
7145	unsafefn vrangepd_128(a: f64x2, b: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
7146	#[link_name = "llvm.x86.avx512.mask.range.pd.256"]
7147	unsafefn vrangepd_256(a: f64x4, b: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
7148	#[link_name = "llvm.x86.avx512.mask.range.pd.512"]
7149	unsafefn vrangepd_512(a: f64x8, b: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;
7150
7151	#[link_name = "llvm.x86.avx512.mask.range.ps.128"]
7152	unsafefn vrangeps_128(a: f32x4, b: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
7153	#[link_name = "llvm.x86.avx512.mask.range.ps.256"]
7154	unsafefn vrangeps_256(a: f32x8, b: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
7155	#[link_name = "llvm.x86.avx512.mask.range.ps.512"]
7156	unsafefn vrangeps_512(a: f32x16, b: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32)
7157	-> f32x16;
7158
7159	#[link_name = "llvm.x86.avx512.mask.range.sd"]
7160	unsafefn vrangesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
7161	#[link_name = "llvm.x86.avx512.mask.range.ss"]
7162	unsafefn vrangess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
7163
7164	#[link_name = "llvm.x86.avx512.mask.reduce.pd.128"]
7165	unsafefn vreducepd_128(a: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
7166	#[link_name = "llvm.x86.avx512.mask.reduce.pd.256"]
7167	unsafefn vreducepd_256(a: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
7168	#[link_name = "llvm.x86.avx512.mask.reduce.pd.512"]
7169	unsafefn vreducepd_512(a: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;
7170
7171	#[link_name = "llvm.x86.avx512.mask.reduce.ps.128"]
7172	unsafefn vreduceps_128(a: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
7173	#[link_name = "llvm.x86.avx512.mask.reduce.ps.256"]
7174	unsafefn vreduceps_256(a: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
7175	#[link_name = "llvm.x86.avx512.mask.reduce.ps.512"]
7176	unsafefn vreduceps_512(a: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) -> f32x16;
7177
7178	#[link_name = "llvm.x86.avx512.mask.reduce.sd"]
7179	unsafefn vreducesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
7180	#[link_name = "llvm.x86.avx512.mask.reduce.ss"]
7181	unsafefn vreducess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
7182
7183	#[link_name = "llvm.x86.avx512.mask.fpclass.pd.128"]
7184	unsafefn vfpclasspd_128(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
7185	#[link_name = "llvm.x86.avx512.mask.fpclass.pd.256"]
7186	unsafefn vfpclasspd_256(a: f64x4, imm8: i32, k: __mmask8) -> __mmask8;
7187	#[link_name = "llvm.x86.avx512.mask.fpclass.pd.512"]
7188	unsafefn vfpclasspd_512(a: f64x8, imm8: i32, k: __mmask8) -> __mmask8;
7189
7190	#[link_name = "llvm.x86.avx512.mask.fpclass.ps.128"]
7191	unsafefn vfpclassps_128(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
7192	#[link_name = "llvm.x86.avx512.mask.fpclass.ps.256"]
7193	unsafefn vfpclassps_256(a: f32x8, imm8: i32, k: __mmask8) -> __mmask8;
7194	#[link_name = "llvm.x86.avx512.mask.fpclass.ps.512"]
7195	unsafefn vfpclassps_512(a: f32x16, imm8: i32, k: __mmask16) -> __mmask16;
7196
7197	#[link_name = "llvm.x86.avx512.mask.fpclass.sd"]
7198	unsafefn vfpclasssd(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
7199	#[link_name = "llvm.x86.avx512.mask.fpclass.ss"]
7200	unsafefn vfpclassss(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
7201	}
7202
7203	#[cfg(test)]
7204	mod tests {
7205	use super::*;
7206
7207	use stdarch_test::simd_test;
7208
7209	use crate::core_arch::x86::*;
7210	use crate::mem::transmute;
7211
7212	const OPRND1_64: f64 = unsafe { transmute(`0x3333333333333333_u64`) };
7213	const OPRND2_64: f64 = unsafe { transmute(`0x5555555555555555_u64`) };
7214
7215	const AND_64: f64 = unsafe { transmute(`0x1111111111111111_u64`) };
7216	const ANDN_64: f64 = unsafe { transmute(`0x4444444444444444_u64`) };
7217	const OR_64: f64 = unsafe { transmute(`0x7777777777777777_u64`) };
7218	const XOR_64: f64 = unsafe { transmute(`0x6666666666666666_u64`) };
7219
7220	const OPRND1_32: f32 = unsafe { transmute(`0x33333333_u32`) };
7221	const OPRND2_32: f32 = unsafe { transmute(`0x55555555_u32`) };
7222
7223	const AND_32: f32 = unsafe { transmute(`0x11111111_u32`) };
7224	const ANDN_32: f32 = unsafe { transmute(`0x44444444_u32`) };
7225	const OR_32: f32 = unsafe { transmute(`0x77777777_u32`) };
7226	const XOR_32: f32 = unsafe { transmute(`0x66666666_u32`) };
7227
7228	#[simd_test(enable = "avx512dq,avx512vl")]
7229	unsafe fn test_mm_mask_and_pd() {
7230	let a = _mm_set1_pd(OPRND1_64);
7231	let b = _mm_set1_pd(OPRND2_64);
7232	let src = _mm_set_pd(`1.`, `2.`);
7233	let r = _mm_mask_and_pd(src, `0b01`, a, b);
7234	let e = _mm_set_pd(`1.`, AND_64);
7235	assert_eq_m128d(r, e);
7236	}
7237
7238	#[simd_test(enable = "avx512dq,avx512vl")]
7239	unsafe fn test_mm_maskz_and_pd() {
7240	let a = _mm_set1_pd(OPRND1_64);
7241	let b = _mm_set1_pd(OPRND2_64);
7242	let r = _mm_maskz_and_pd(`0b01`, a, b);
7243	let e = _mm_set_pd(`0.0`, AND_64);
7244	assert_eq_m128d(r, e);
7245	}
7246
7247	#[simd_test(enable = "avx512dq,avx512vl")]
7248	unsafe fn test_mm256_mask_and_pd() {
7249	let a = _mm256_set1_pd(OPRND1_64);
7250	let b = _mm256_set1_pd(OPRND2_64);
7251	let src = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
7252	let r = _mm256_mask_and_pd(src, `0b0101`, a, b);
7253	let e = _mm256_set_pd(`1.`, AND_64, `3.`, AND_64);
7254	assert_eq_m256d(r, e);
7255	}
7256
7257	#[simd_test(enable = "avx512dq,avx512vl")]
7258	unsafe fn test_mm256_maskz_and_pd() {
7259	let a = _mm256_set1_pd(OPRND1_64);
7260	let b = _mm256_set1_pd(OPRND2_64);
7261	let r = _mm256_maskz_and_pd(`0b0101`, a, b);
7262	let e = _mm256_set_pd(`0.0`, AND_64, `0.0`, AND_64);
7263	assert_eq_m256d(r, e);
7264	}
7265
7266	#[simd_test(enable = "avx512dq")]
7267	unsafe fn test_mm512_and_pd() {
7268	let a = _mm512_set1_pd(OPRND1_64);
7269	let b = _mm512_set1_pd(OPRND2_64);
7270	let r = _mm512_and_pd(a, b);
7271	let e = _mm512_set1_pd(AND_64);
7272	assert_eq_m512d(r, e);
7273	}
7274
7275	#[simd_test(enable = "avx512dq")]
7276	unsafe fn test_mm512_mask_and_pd() {
7277	let a = _mm512_set1_pd(OPRND1_64);
7278	let b = _mm512_set1_pd(OPRND2_64);
7279	let src = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
7280	let r = _mm512_mask_and_pd(src, `0b01010101`, a, b);
7281	let e = _mm512_set_pd(`1.`, AND_64, `3.`, AND_64, `5.`, AND_64, `7.`, AND_64);
7282	assert_eq_m512d(r, e);
7283	}
7284
7285	#[simd_test(enable = "avx512dq")]
7286	unsafe fn test_mm512_maskz_and_pd() {
7287	let a = _mm512_set1_pd(OPRND1_64);
7288	let b = _mm512_set1_pd(OPRND2_64);
7289	let r = _mm512_maskz_and_pd(`0b01010101`, a, b);
7290	let e = _mm512_set_pd(`0.0`, AND_64, `0.0`, AND_64, `0.0`, AND_64, `0.0`, AND_64);
7291	assert_eq_m512d(r, e);
7292	}
7293
7294	#[simd_test(enable = "avx512dq,avx512vl")]
7295	unsafe fn test_mm_mask_and_ps() {
7296	let a = _mm_set1_ps(OPRND1_32);
7297	let b = _mm_set1_ps(OPRND2_32);
7298	let src = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
7299	let r = _mm_mask_and_ps(src, `0b0101`, a, b);
7300	let e = _mm_set_ps(`1.`, AND_32, `3.`, AND_32);
7301	assert_eq_m128(r, e);
7302	}
7303
7304	#[simd_test(enable = "avx512dq,avx512vl")]
7305	unsafe fn test_mm_maskz_and_ps() {
7306	let a = _mm_set1_ps(OPRND1_32);
7307	let b = _mm_set1_ps(OPRND2_32);
7308	let r = _mm_maskz_and_ps(`0b0101`, a, b);
7309	let e = _mm_set_ps(`0.0`, AND_32, `0.0`, AND_32);
7310	assert_eq_m128(r, e);
7311	}
7312
7313	#[simd_test(enable = "avx512dq,avx512vl")]
7314	unsafe fn test_mm256_mask_and_ps() {
7315	let a = _mm256_set1_ps(OPRND1_32);
7316	let b = _mm256_set1_ps(OPRND2_32);
7317	let src = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
7318	let r = _mm256_mask_and_ps(src, `0b01010101`, a, b);
7319	let e = _mm256_set_ps(`1.`, AND_32, `3.`, AND_32, `5.`, AND_32, `7.`, AND_32);
7320	assert_eq_m256(r, e);
7321	}
7322
7323	#[simd_test(enable = "avx512dq,avx512vl")]
7324	unsafe fn test_mm256_maskz_and_ps() {
7325	let a = _mm256_set1_ps(OPRND1_32);
7326	let b = _mm256_set1_ps(OPRND2_32);
7327	let r = _mm256_maskz_and_ps(`0b01010101`, a, b);
7328	let e = _mm256_set_ps(`0.0`, AND_32, `0.0`, AND_32, `0.0`, AND_32, `0.0`, AND_32);
7329	assert_eq_m256(r, e);
7330	}
7331
7332	#[simd_test(enable = "avx512dq")]
7333	unsafe fn test_mm512_and_ps() {
7334	let a = _mm512_set1_ps(OPRND1_32);
7335	let b = _mm512_set1_ps(OPRND2_32);
7336	let r = _mm512_and_ps(a, b);
7337	let e = _mm512_set1_ps(AND_32);
7338	assert_eq_m512(r, e);
7339	}
7340
7341	#[simd_test(enable = "avx512dq")]
7342	unsafe fn test_mm512_mask_and_ps() {
7343	let a = _mm512_set1_ps(OPRND1_32);
7344	let b = _mm512_set1_ps(OPRND2_32);
7345	let src = _mm512_set_ps(
7346	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
7347	);
7348	let r = _mm512_mask_and_ps(src, `0b0101010101010101`, a, b);
7349	let e = _mm512_set_ps(
7350	`1.`, AND_32, `3.`, AND_32, `5.`, AND_32, `7.`, AND_32, `9.`, AND_32, `11.`, AND_32, `13.`, AND_32,
7351	`15.`, AND_32,
7352	);
7353	assert_eq_m512(r, e);
7354	}
7355
7356	#[simd_test(enable = "avx512dq")]
7357	unsafe fn test_mm512_maskz_and_ps() {
7358	let a = _mm512_set1_ps(OPRND1_32);
7359	let b = _mm512_set1_ps(OPRND2_32);
7360	let r = _mm512_maskz_and_ps(`0b0101010101010101`, a, b);
7361	let e = _mm512_set_ps(
7362	`0.`, AND_32, `0.`, AND_32, `0.`, AND_32, `0.`, AND_32, `0.`, AND_32, `0.`, AND_32, `0.`, AND_32, `0.`,
7363	AND_32,
7364	);
7365	assert_eq_m512(r, e);
7366	}
7367
7368	#[simd_test(enable = "avx512dq,avx512vl")]
7369	unsafe fn test_mm_mask_andnot_pd() {
7370	let a = _mm_set1_pd(OPRND1_64);
7371	let b = _mm_set1_pd(OPRND2_64);
7372	let src = _mm_set_pd(`1.`, `2.`);
7373	let r = _mm_mask_andnot_pd(src, `0b01`, a, b);
7374	let e = _mm_set_pd(`1.`, ANDN_64);
7375	assert_eq_m128d(r, e);
7376	}
7377
7378	#[simd_test(enable = "avx512dq,avx512vl")]
7379	unsafe fn test_mm_maskz_andnot_pd() {
7380	let a = _mm_set1_pd(OPRND1_64);
7381	let b = _mm_set1_pd(OPRND2_64);
7382	let r = _mm_maskz_andnot_pd(`0b01`, a, b);
7383	let e = _mm_set_pd(`0.0`, ANDN_64);
7384	assert_eq_m128d(r, e);
7385	}
7386
7387	#[simd_test(enable = "avx512dq,avx512vl")]
7388	unsafe fn test_mm256_mask_andnot_pd() {
7389	let a = _mm256_set1_pd(OPRND1_64);
7390	let b = _mm256_set1_pd(OPRND2_64);
7391	let src = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
7392	let r = _mm256_mask_andnot_pd(src, `0b0101`, a, b);
7393	let e = _mm256_set_pd(`1.`, ANDN_64, `3.`, ANDN_64);
7394	assert_eq_m256d(r, e);
7395	}
7396
7397	#[simd_test(enable = "avx512dq,avx512vl")]
7398	unsafe fn test_mm256_maskz_andnot_pd() {
7399	let a = _mm256_set1_pd(OPRND1_64);
7400	let b = _mm256_set1_pd(OPRND2_64);
7401	let r = _mm256_maskz_andnot_pd(`0b0101`, a, b);
7402	let e = _mm256_set_pd(`0.0`, ANDN_64, `0.0`, ANDN_64);
7403	assert_eq_m256d(r, e);
7404	}
7405
7406	#[simd_test(enable = "avx512dq")]
7407	unsafe fn test_mm512_andnot_pd() {
7408	let a = _mm512_set1_pd(OPRND1_64);
7409	let b = _mm512_set1_pd(OPRND2_64);
7410	let r = _mm512_andnot_pd(a, b);
7411	let e = _mm512_set1_pd(ANDN_64);
7412	assert_eq_m512d(r, e);
7413	}
7414
7415	#[simd_test(enable = "avx512dq")]
7416	unsafe fn test_mm512_mask_andnot_pd() {
7417	let a = _mm512_set1_pd(OPRND1_64);
7418	let b = _mm512_set1_pd(OPRND2_64);
7419	let src = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
7420	let r = _mm512_mask_andnot_pd(src, `0b01010101`, a, b);
7421	let e = _mm512_set_pd(`1.`, ANDN_64, `3.`, ANDN_64, `5.`, ANDN_64, `7.`, ANDN_64);
7422	assert_eq_m512d(r, e);
7423	}
7424
7425	#[simd_test(enable = "avx512dq")]
7426	unsafe fn test_mm512_maskz_andnot_pd() {
7427	let a = _mm512_set1_pd(OPRND1_64);
7428	let b = _mm512_set1_pd(OPRND2_64);
7429	let r = _mm512_maskz_andnot_pd(`0b01010101`, a, b);
7430	let e = _mm512_set_pd(`0.0`, ANDN_64, `0.0`, ANDN_64, `0.0`, ANDN_64, `0.0`, ANDN_64);
7431	assert_eq_m512d(r, e);
7432	}
7433
7434	#[simd_test(enable = "avx512dq,avx512vl")]
7435	unsafe fn test_mm_mask_andnot_ps() {
7436	let a = _mm_set1_ps(OPRND1_32);
7437	let b = _mm_set1_ps(OPRND2_32);
7438	let src = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
7439	let r = _mm_mask_andnot_ps(src, `0b0101`, a, b);
7440	let e = _mm_set_ps(`1.`, ANDN_32, `3.`, ANDN_32);
7441	assert_eq_m128(r, e);
7442	}
7443
7444	#[simd_test(enable = "avx512dq,avx512vl")]
7445	unsafe fn test_mm_maskz_andnot_ps() {
7446	let a = _mm_set1_ps(OPRND1_32);
7447	let b = _mm_set1_ps(OPRND2_32);
7448	let r = _mm_maskz_andnot_ps(`0b0101`, a, b);
7449	let e = _mm_set_ps(`0.0`, ANDN_32, `0.0`, ANDN_32);
7450	assert_eq_m128(r, e);
7451	}
7452
7453	#[simd_test(enable = "avx512dq,avx512vl")]
7454	unsafe fn test_mm256_mask_andnot_ps() {
7455	let a = _mm256_set1_ps(OPRND1_32);
7456	let b = _mm256_set1_ps(OPRND2_32);
7457	let src = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
7458	let r = _mm256_mask_andnot_ps(src, `0b01010101`, a, b);
7459	let e = _mm256_set_ps(`1.`, ANDN_32, `3.`, ANDN_32, `5.`, ANDN_32, `7.`, ANDN_32);
7460	assert_eq_m256(r, e);
7461	}
7462
7463	#[simd_test(enable = "avx512dq,avx512vl")]
7464	unsafe fn test_mm256_maskz_andnot_ps() {
7465	let a = _mm256_set1_ps(OPRND1_32);
7466	let b = _mm256_set1_ps(OPRND2_32);
7467	let r = _mm256_maskz_andnot_ps(`0b01010101`, a, b);
7468	let e = _mm256_set_ps(`0.0`, ANDN_32, `0.0`, ANDN_32, `0.0`, ANDN_32, `0.0`, ANDN_32);
7469	assert_eq_m256(r, e);
7470	}
7471
7472	#[simd_test(enable = "avx512dq")]
7473	unsafe fn test_mm512_andnot_ps() {
7474	let a = _mm512_set1_ps(OPRND1_32);
7475	let b = _mm512_set1_ps(OPRND2_32);
7476	let r = _mm512_andnot_ps(a, b);
7477	let e = _mm512_set1_ps(ANDN_32);
7478	assert_eq_m512(r, e);
7479	}
7480
7481	#[simd_test(enable = "avx512dq")]
7482	unsafe fn test_mm512_mask_andnot_ps() {
7483	let a = _mm512_set1_ps(OPRND1_32);
7484	let b = _mm512_set1_ps(OPRND2_32);
7485	let src = _mm512_set_ps(
7486	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
7487	);
7488	let r = _mm512_mask_andnot_ps(src, `0b0101010101010101`, a, b);
7489	let e = _mm512_set_ps(
7490	`1.`, ANDN_32, `3.`, ANDN_32, `5.`, ANDN_32, `7.`, ANDN_32, `9.`, ANDN_32, `11.`, ANDN_32, `13.`,
7491	ANDN_32, `15.`, ANDN_32,
7492	);
7493	assert_eq_m512(r, e);
7494	}
7495
7496	#[simd_test(enable = "avx512dq")]
7497	unsafe fn test_mm512_maskz_andnot_ps() {
7498	let a = _mm512_set1_ps(OPRND1_32);
7499	let b = _mm512_set1_ps(OPRND2_32);
7500	let r = _mm512_maskz_andnot_ps(`0b0101010101010101`, a, b);
7501	let e = _mm512_set_ps(
7502	`0.`, ANDN_32, `0.`, ANDN_32, `0.`, ANDN_32, `0.`, ANDN_32, `0.`, ANDN_32, `0.`, ANDN_32, `0.`,
7503	ANDN_32, `0.`, ANDN_32,
7504	);
7505	assert_eq_m512(r, e);
7506	}
7507
7508	#[simd_test(enable = "avx512dq,avx512vl")]
7509	unsafe fn test_mm_mask_or_pd() {
7510	let a = _mm_set1_pd(OPRND1_64);
7511	let b = _mm_set1_pd(OPRND2_64);
7512	let src = _mm_set_pd(`1.`, `2.`);
7513	let r = _mm_mask_or_pd(src, `0b01`, a, b);
7514	let e = _mm_set_pd(`1.`, OR_64);
7515	assert_eq_m128d(r, e);
7516	}
7517
7518	#[simd_test(enable = "avx512dq,avx512vl")]
7519	unsafe fn test_mm_maskz_or_pd() {
7520	let a = _mm_set1_pd(OPRND1_64);
7521	let b = _mm_set1_pd(OPRND2_64);
7522	let r = _mm_maskz_or_pd(`0b01`, a, b);
7523	let e = _mm_set_pd(`0.0`, OR_64);
7524	assert_eq_m128d(r, e);
7525	}
7526
7527	#[simd_test(enable = "avx512dq,avx512vl")]
7528	unsafe fn test_mm256_mask_or_pd() {
7529	let a = _mm256_set1_pd(OPRND1_64);
7530	let b = _mm256_set1_pd(OPRND2_64);
7531	let src = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
7532	let r = _mm256_mask_or_pd(src, `0b0101`, a, b);
7533	let e = _mm256_set_pd(`1.`, OR_64, `3.`, OR_64);
7534	assert_eq_m256d(r, e);
7535	}
7536
7537	#[simd_test(enable = "avx512dq,avx512vl")]
7538	unsafe fn test_mm256_maskz_or_pd() {
7539	let a = _mm256_set1_pd(OPRND1_64);
7540	let b = _mm256_set1_pd(OPRND2_64);
7541	let r = _mm256_maskz_or_pd(`0b0101`, a, b);
7542	let e = _mm256_set_pd(`0.0`, OR_64, `0.0`, OR_64);
7543	assert_eq_m256d(r, e);
7544	}
7545
7546	#[simd_test(enable = "avx512dq")]
7547	unsafe fn test_mm512_or_pd() {
7548	let a = _mm512_set1_pd(OPRND1_64);
7549	let b = _mm512_set1_pd(OPRND2_64);
7550	let r = _mm512_or_pd(a, b);
7551	let e = _mm512_set1_pd(OR_64);
7552	assert_eq_m512d(r, e);
7553	}
7554
7555	#[simd_test(enable = "avx512dq")]
7556	unsafe fn test_mm512_mask_or_pd() {
7557	let a = _mm512_set1_pd(OPRND1_64);
7558	let b = _mm512_set1_pd(OPRND2_64);
7559	let src = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
7560	let r = _mm512_mask_or_pd(src, `0b01010101`, a, b);
7561	let e = _mm512_set_pd(`1.`, OR_64, `3.`, OR_64, `5.`, OR_64, `7.`, OR_64);
7562	assert_eq_m512d(r, e);
7563	}
7564
7565	#[simd_test(enable = "avx512dq")]
7566	unsafe fn test_mm512_maskz_or_pd() {
7567	let a = _mm512_set1_pd(OPRND1_64);
7568	let b = _mm512_set1_pd(OPRND2_64);
7569	let r = _mm512_maskz_or_pd(`0b01010101`, a, b);
7570	let e = _mm512_set_pd(`0.0`, OR_64, `0.0`, OR_64, `0.0`, OR_64, `0.0`, OR_64);
7571	assert_eq_m512d(r, e);
7572	}
7573
7574	#[simd_test(enable = "avx512dq,avx512vl")]
7575	unsafe fn test_mm_mask_or_ps() {
7576	let a = _mm_set1_ps(OPRND1_32);
7577	let b = _mm_set1_ps(OPRND2_32);
7578	let src = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
7579	let r = _mm_mask_or_ps(src, `0b0101`, a, b);
7580	let e = _mm_set_ps(`1.`, OR_32, `3.`, OR_32);
7581	assert_eq_m128(r, e);
7582	}
7583
7584	#[simd_test(enable = "avx512dq,avx512vl")]
7585	unsafe fn test_mm_maskz_or_ps() {
7586	let a = _mm_set1_ps(OPRND1_32);
7587	let b = _mm_set1_ps(OPRND2_32);
7588	let r = _mm_maskz_or_ps(`0b0101`, a, b);
7589	let e = _mm_set_ps(`0.0`, OR_32, `0.0`, OR_32);
7590	assert_eq_m128(r, e);
7591	}
7592
7593	#[simd_test(enable = "avx512dq,avx512vl")]
7594	unsafe fn test_mm256_mask_or_ps() {
7595	let a = _mm256_set1_ps(OPRND1_32);
7596	let b = _mm256_set1_ps(OPRND2_32);
7597	let src = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
7598	let r = _mm256_mask_or_ps(src, `0b01010101`, a, b);
7599	let e = _mm256_set_ps(`1.`, OR_32, `3.`, OR_32, `5.`, OR_32, `7.`, OR_32);
7600	assert_eq_m256(r, e);
7601	}
7602
7603	#[simd_test(enable = "avx512dq,avx512vl")]
7604	unsafe fn test_mm256_maskz_or_ps() {
7605	let a = _mm256_set1_ps(OPRND1_32);
7606	let b = _mm256_set1_ps(OPRND2_32);
7607	let r = _mm256_maskz_or_ps(`0b01010101`, a, b);
7608	let e = _mm256_set_ps(`0.0`, OR_32, `0.0`, OR_32, `0.0`, OR_32, `0.0`, OR_32);
7609	assert_eq_m256(r, e);
7610	}
7611
7612	#[simd_test(enable = "avx512dq")]
7613	unsafe fn test_mm512_or_ps() {
7614	let a = _mm512_set1_ps(OPRND1_32);
7615	let b = _mm512_set1_ps(OPRND2_32);
7616	let r = _mm512_or_ps(a, b);
7617	let e = _mm512_set1_ps(OR_32);
7618	assert_eq_m512(r, e);
7619	}
7620
7621	#[simd_test(enable = "avx512dq")]
7622	unsafe fn test_mm512_mask_or_ps() {
7623	let a = _mm512_set1_ps(OPRND1_32);
7624	let b = _mm512_set1_ps(OPRND2_32);
7625	let src = _mm512_set_ps(
7626	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
7627	);
7628	let r = _mm512_mask_or_ps(src, `0b0101010101010101`, a, b);
7629	let e = _mm512_set_ps(
7630	`1.`, OR_32, `3.`, OR_32, `5.`, OR_32, `7.`, OR_32, `9.`, OR_32, `11.`, OR_32, `13.`, OR_32, `15.`,
7631	OR_32,
7632	);
7633	assert_eq_m512(r, e);
7634	}
7635
7636	#[simd_test(enable = "avx512dq")]
7637	unsafe fn test_mm512_maskz_or_ps() {
7638	let a = _mm512_set1_ps(OPRND1_32);
7639	let b = _mm512_set1_ps(OPRND2_32);
7640	let r = _mm512_maskz_or_ps(`0b0101010101010101`, a, b);
7641	let e = _mm512_set_ps(
7642	`0.`, OR_32, `0.`, OR_32, `0.`, OR_32, `0.`, OR_32, `0.`, OR_32, `0.`, OR_32, `0.`, OR_32, `0.`, OR_32,
7643	);
7644	assert_eq_m512(r, e);
7645	}
7646
7647	#[simd_test(enable = "avx512dq,avx512vl")]
7648	unsafe fn test_mm_mask_xor_pd() {
7649	let a = _mm_set1_pd(OPRND1_64);
7650	let b = _mm_set1_pd(OPRND2_64);
7651	let src = _mm_set_pd(`1.`, `2.`);
7652	let r = _mm_mask_xor_pd(src, `0b01`, a, b);
7653	let e = _mm_set_pd(`1.`, XOR_64);
7654	assert_eq_m128d(r, e);
7655	}
7656
7657	#[simd_test(enable = "avx512dq,avx512vl")]
7658	unsafe fn test_mm_maskz_xor_pd() {
7659	let a = _mm_set1_pd(OPRND1_64);
7660	let b = _mm_set1_pd(OPRND2_64);
7661	let r = _mm_maskz_xor_pd(`0b01`, a, b);
7662	let e = _mm_set_pd(`0.0`, XOR_64);
7663	assert_eq_m128d(r, e);
7664	}
7665
7666	#[simd_test(enable = "avx512dq,avx512vl")]
7667	unsafe fn test_mm256_mask_xor_pd() {
7668	let a = _mm256_set1_pd(OPRND1_64);
7669	let b = _mm256_set1_pd(OPRND2_64);
7670	let src = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
7671	let r = _mm256_mask_xor_pd(src, `0b0101`, a, b);
7672	let e = _mm256_set_pd(`1.`, XOR_64, `3.`, XOR_64);
7673	assert_eq_m256d(r, e);
7674	}
7675
7676	#[simd_test(enable = "avx512dq,avx512vl")]
7677	unsafe fn test_mm256_maskz_xor_pd() {
7678	let a = _mm256_set1_pd(OPRND1_64);
7679	let b = _mm256_set1_pd(OPRND2_64);
7680	let r = _mm256_maskz_xor_pd(`0b0101`, a, b);
7681	let e = _mm256_set_pd(`0.0`, XOR_64, `0.0`, XOR_64);
7682	assert_eq_m256d(r, e);
7683	}
7684
7685	#[simd_test(enable = "avx512dq")]
7686	unsafe fn test_mm512_xor_pd() {
7687	let a = _mm512_set1_pd(OPRND1_64);
7688	let b = _mm512_set1_pd(OPRND2_64);
7689	let r = _mm512_xor_pd(a, b);
7690	let e = _mm512_set1_pd(XOR_64);
7691	assert_eq_m512d(r, e);
7692	}
7693
7694	#[simd_test(enable = "avx512dq")]
7695	unsafe fn test_mm512_mask_xor_pd() {
7696	let a = _mm512_set1_pd(OPRND1_64);
7697	let b = _mm512_set1_pd(OPRND2_64);
7698	let src = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
7699	let r = _mm512_mask_xor_pd(src, `0b01010101`, a, b);
7700	let e = _mm512_set_pd(`1.`, XOR_64, `3.`, XOR_64, `5.`, XOR_64, `7.`, XOR_64);
7701	assert_eq_m512d(r, e);
7702	}
7703
7704	#[simd_test(enable = "avx512dq")]
7705	unsafe fn test_mm512_maskz_xor_pd() {
7706	let a = _mm512_set1_pd(OPRND1_64);
7707	let b = _mm512_set1_pd(OPRND2_64);
7708	let r = _mm512_maskz_xor_pd(`0b01010101`, a, b);
7709	let e = _mm512_set_pd(`0.0`, XOR_64, `0.0`, XOR_64, `0.0`, XOR_64, `0.0`, XOR_64);
7710	assert_eq_m512d(r, e);
7711	}
7712
7713	#[simd_test(enable = "avx512dq,avx512vl")]
7714	unsafe fn test_mm_mask_xor_ps() {
7715	let a = _mm_set1_ps(OPRND1_32);
7716	let b = _mm_set1_ps(OPRND2_32);
7717	let src = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
7718	let r = _mm_mask_xor_ps(src, `0b0101`, a, b);
7719	let e = _mm_set_ps(`1.`, XOR_32, `3.`, XOR_32);
7720	assert_eq_m128(r, e);
7721	}
7722
7723	#[simd_test(enable = "avx512dq,avx512vl")]
7724	unsafe fn test_mm_maskz_xor_ps() {
7725	let a = _mm_set1_ps(OPRND1_32);
7726	let b = _mm_set1_ps(OPRND2_32);
7727	let r = _mm_maskz_xor_ps(`0b0101`, a, b);
7728	let e = _mm_set_ps(`0.0`, XOR_32, `0.0`, XOR_32);
7729	assert_eq_m128(r, e);
7730	}
7731
7732	#[simd_test(enable = "avx512dq,avx512vl")]
7733	unsafe fn test_mm256_mask_xor_ps() {
7734	let a = _mm256_set1_ps(OPRND1_32);
7735	let b = _mm256_set1_ps(OPRND2_32);
7736	let src = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
7737	let r = _mm256_mask_xor_ps(src, `0b01010101`, a, b);
7738	let e = _mm256_set_ps(`1.`, XOR_32, `3.`, XOR_32, `5.`, XOR_32, `7.`, XOR_32);
7739	assert_eq_m256(r, e);
7740	}
7741
7742	#[simd_test(enable = "avx512dq,avx512vl")]
7743	unsafe fn test_mm256_maskz_xor_ps() {
7744	let a = _mm256_set1_ps(OPRND1_32);
7745	let b = _mm256_set1_ps(OPRND2_32);
7746	let r = _mm256_maskz_xor_ps(`0b01010101`, a, b);
7747	let e = _mm256_set_ps(`0.0`, XOR_32, `0.0`, XOR_32, `0.0`, XOR_32, `0.0`, XOR_32);
7748	assert_eq_m256(r, e);
7749	}
7750
7751	#[simd_test(enable = "avx512dq")]
7752	unsafe fn test_mm512_xor_ps() {
7753	let a = _mm512_set1_ps(OPRND1_32);
7754	let b = _mm512_set1_ps(OPRND2_32);
7755	let r = _mm512_xor_ps(a, b);
7756	let e = _mm512_set1_ps(XOR_32);
7757	assert_eq_m512(r, e);
7758	}
7759
7760	#[simd_test(enable = "avx512dq")]
7761	unsafe fn test_mm512_mask_xor_ps() {
7762	let a = _mm512_set1_ps(OPRND1_32);
7763	let b = _mm512_set1_ps(OPRND2_32);
7764	let src = _mm512_set_ps(
7765	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
7766	);
7767	let r = _mm512_mask_xor_ps(src, `0b0101010101010101`, a, b);
7768	let e = _mm512_set_ps(
7769	`1.`, XOR_32, `3.`, XOR_32, `5.`, XOR_32, `7.`, XOR_32, `9.`, XOR_32, `11.`, XOR_32, `13.`, XOR_32,
7770	`15.`, XOR_32,
7771	);
7772	assert_eq_m512(r, e);
7773	}
7774
7775	#[simd_test(enable = "avx512dq")]
7776	unsafe fn test_mm512_maskz_xor_ps() {
7777	let a = _mm512_set1_ps(OPRND1_32);
7778	let b = _mm512_set1_ps(OPRND2_32);
7779	let r = _mm512_maskz_xor_ps(`0b0101010101010101`, a, b);
7780	let e = _mm512_set_ps(
7781	`0.`, XOR_32, `0.`, XOR_32, `0.`, XOR_32, `0.`, XOR_32, `0.`, XOR_32, `0.`, XOR_32, `0.`, XOR_32, `0.`,
7782	XOR_32,
7783	);
7784	assert_eq_m512(r, e);
7785	}
7786
7787	#[simd_test(enable = "avx512dq,avx512vl")]
7788	unsafe fn test_mm256_broadcast_f32x2() {
7789	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
7790	let r = _mm256_broadcast_f32x2(a);
7791	let e = _mm256_set_ps(`3.`, `4.`, `3.`, `4.`, `3.`, `4.`, `3.`, `4.`);
7792	assert_eq_m256(r, e);
7793	}
7794
7795	#[simd_test(enable = "avx512dq,avx512vl")]
7796	unsafe fn test_mm256_mask_broadcast_f32x2() {
7797	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
7798	let b = _mm256_set_ps(`5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`);
7799	let r = _mm256_mask_broadcast_f32x2(b, `0b01101001`, a);
7800	let e = _mm256_set_ps(`5.`, `4.`, `3.`, `8.`, `3.`, `10.`, `11.`, `4.`);
7801	assert_eq_m256(r, e);
7802	}
7803
7804	#[simd_test(enable = "avx512dq,avx512vl")]
7805	unsafe fn test_mm256_maskz_broadcast_f32x2() {
7806	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
7807	let r = _mm256_maskz_broadcast_f32x2(`0b01101001`, a);
7808	let e = _mm256_set_ps(`0.`, `4.`, `3.`, `0.`, `3.`, `0.`, `0.`, `4.`);
7809	assert_eq_m256(r, e);
7810	}
7811
7812	#[simd_test(enable = "avx512dq")]
7813	unsafe fn test_mm512_broadcast_f32x2() {
7814	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
7815	let r = _mm512_broadcast_f32x2(a);
7816	let e = _mm512_set_ps(
7817	`3.`, `4.`, `3.`, `4.`, `3.`, `4.`, `3.`, `4.`, `3.`, `4.`, `3.`, `4.`, `3.`, `4.`, `3.`, `4.`,
7818	);
7819	assert_eq_m512(r, e);
7820	}
7821
7822	#[simd_test(enable = "avx512dq")]
7823	unsafe fn test_mm512_mask_broadcast_f32x2() {
7824	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
7825	let b = _mm512_set_ps(
7826	`5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`,
7827	);
7828	let r = _mm512_mask_broadcast_f32x2(b, `0b0110100100111100`, a);
7829	let e = _mm512_set_ps(
7830	`5.`, `4.`, `3.`, `8.`, `3.`, `10.`, `11.`, `4.`, `13.`, `14.`, `3.`, `4.`, `3.`, `4.`, `19.`, `20.`,
7831	);
7832	assert_eq_m512(r, e);
7833	}
7834
7835	#[simd_test(enable = "avx512dq")]
7836	unsafe fn test_mm512_maskz_broadcast_f32x2() {
7837	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
7838	let r = _mm512_maskz_broadcast_f32x2(`0b0110100100111100`, a);
7839	let e = _mm512_set_ps(
7840	`0.`, `4.`, `3.`, `0.`, `3.`, `0.`, `0.`, `4.`, `0.`, `0.`, `3.`, `4.`, `3.`, `4.`, `0.`, `0.`,
7841	);
7842	assert_eq_m512(r, e);
7843	}
7844
7845	#[simd_test(enable = "avx512dq")]
7846	unsafe fn test_mm512_broadcast_f32x8() {
7847	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
7848	let r = _mm512_broadcast_f32x8(a);
7849	let e = _mm512_set_ps(
7850	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`,
7851	);
7852	assert_eq_m512(r, e);
7853	}
7854
7855	#[simd_test(enable = "avx512dq")]
7856	unsafe fn test_mm512_mask_broadcast_f32x8() {
7857	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
7858	let b = _mm512_set_ps(
7859	`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`,
7860	);
7861	let r = _mm512_mask_broadcast_f32x8(b, `0b0110100100111100`, a);
7862	let e = _mm512_set_ps(
7863	`9.`, `2.`, `3.`, `12.`, `5.`, `14.`, `15.`, `8.`, `17.`, `18.`, `3.`, `4.`, `5.`, `6.`, `23.`, `24.`,
7864	);
7865	assert_eq_m512(r, e);
7866	}
7867
7868	#[simd_test(enable = "avx512dq")]
7869	unsafe fn test_mm512_maskz_broadcast_f32x8() {
7870	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
7871	let r = _mm512_maskz_broadcast_f32x8(`0b0110100100111100`, a);
7872	let e = _mm512_set_ps(
7873	`0.`, `2.`, `3.`, `0.`, `5.`, `0.`, `0.`, `8.`, `0.`, `0.`, `3.`, `4.`, `5.`, `6.`, `0.`, `0.`,
7874	);
7875	assert_eq_m512(r, e);
7876	}
7877
7878	#[simd_test(enable = "avx512dq,avx512vl")]
7879	unsafe fn test_mm256_broadcast_f64x2() {
7880	let a = _mm_set_pd(`1.`, `2.`);
7881	let r = _mm256_broadcast_f64x2(a);
7882	let e = _mm256_set_pd(`1.`, `2.`, `1.`, `2.`);
7883	assert_eq_m256d(r, e);
7884	}
7885
7886	#[simd_test(enable = "avx512dq,avx512vl")]
7887	unsafe fn test_mm256_mask_broadcast_f64x2() {
7888	let a = _mm_set_pd(`1.`, `2.`);
7889	let b = _mm256_set_pd(`3.`, `4.`, `5.`, `6.`);
7890	let r = _mm256_mask_broadcast_f64x2(b, `0b0110`, a);
7891	let e = _mm256_set_pd(`3.`, `2.`, `1.`, `6.`);
7892	assert_eq_m256d(r, e);
7893	}
7894
7895	#[simd_test(enable = "avx512dq,avx512vl")]
7896	unsafe fn test_mm256_maskz_broadcast_f64x2() {
7897	let a = _mm_set_pd(`1.`, `2.`);
7898	let r = _mm256_maskz_broadcast_f64x2(`0b0110`, a);
7899	let e = _mm256_set_pd(`0.`, `2.`, `1.`, `0.`);
7900	assert_eq_m256d(r, e);
7901	}
7902
7903	#[simd_test(enable = "avx512dq")]
7904	unsafe fn test_mm512_broadcast_f64x2() {
7905	let a = _mm_set_pd(`1.`, `2.`);
7906	let r = _mm512_broadcast_f64x2(a);
7907	let e = _mm512_set_pd(`1.`, `2.`, `1.`, `2.`, `1.`, `2.`, `1.`, `2.`);
7908	assert_eq_m512d(r, e);
7909	}
7910
7911	#[simd_test(enable = "avx512dq")]
7912	unsafe fn test_mm512_mask_broadcast_f64x2() {
7913	let a = _mm_set_pd(`1.`, `2.`);
7914	let b = _mm512_set_pd(`3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`);
7915	let r = _mm512_mask_broadcast_f64x2(b, `0b01101001`, a);
7916	let e = _mm512_set_pd(`3.`, `2.`, `1.`, `6.`, `1.`, `8.`, `9.`, `2.`);
7917	assert_eq_m512d(r, e);
7918	}
7919
7920	#[simd_test(enable = "avx512dq")]
7921	unsafe fn test_mm512_maskz_broadcast_f64x2() {
7922	let a = _mm_set_pd(`1.`, `2.`);
7923	let r = _mm512_maskz_broadcast_f64x2(`0b01101001`, a);
7924	let e = _mm512_set_pd(`0.`, `2.`, `1.`, `0.`, `1.`, `0.`, `0.`, `2.`);
7925	assert_eq_m512d(r, e);
7926	}
7927
7928	#[simd_test(enable = "avx512dq,avx512vl")]
7929	unsafe fn test_mm_broadcast_i32x2() {
7930	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
7931	let r = _mm_broadcast_i32x2(a);
7932	let e = _mm_set_epi32(`3`, `4`, `3`, `4`);
7933	assert_eq_m128i(r, e);
7934	}
7935
7936	#[simd_test(enable = "avx512dq,avx512vl")]
7937	unsafe fn test_mm_mask_broadcast_i32x2() {
7938	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
7939	let b = _mm_set_epi32(`5`, `6`, `7`, `8`);
7940	let r = _mm_mask_broadcast_i32x2(b, `0b0110`, a);
7941	let e = _mm_set_epi32(`5`, `4`, `3`, `8`);
7942	assert_eq_m128i(r, e);
7943	}
7944
7945	#[simd_test(enable = "avx512dq,avx512vl")]
7946	unsafe fn test_mm_maskz_broadcast_i32x2() {
7947	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
7948	let r = _mm_maskz_broadcast_i32x2(`0b0110`, a);
7949	let e = _mm_set_epi32(`0`, `4`, `3`, `0`);
7950	assert_eq_m128i(r, e);
7951	}
7952
7953	#[simd_test(enable = "avx512dq,avx512vl")]
7954	unsafe fn test_mm256_broadcast_i32x2() {
7955	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
7956	let r = _mm256_broadcast_i32x2(a);
7957	let e = _mm256_set_epi32(`3`, `4`, `3`, `4`, `3`, `4`, `3`, `4`);
7958	assert_eq_m256i(r, e);
7959	}
7960
7961	#[simd_test(enable = "avx512dq,avx512vl")]
7962	unsafe fn test_mm256_mask_broadcast_i32x2() {
7963	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
7964	let b = _mm256_set_epi32(`5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`);
7965	let r = _mm256_mask_broadcast_i32x2(b, `0b01101001`, a);
7966	let e = _mm256_set_epi32(`5`, `4`, `3`, `8`, `3`, `10`, `11`, `4`);
7967	assert_eq_m256i(r, e);
7968	}
7969
7970	#[simd_test(enable = "avx512dq,avx512vl")]
7971	unsafe fn test_mm256_maskz_broadcast_i32x2() {
7972	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
7973	let r = _mm256_maskz_broadcast_i32x2(`0b01101001`, a);
7974	let e = _mm256_set_epi32(`0`, `4`, `3`, `0`, `3`, `0`, `0`, `4`);
7975	assert_eq_m256i(r, e);
7976	}
7977
7978	#[simd_test(enable = "avx512dq")]
7979	unsafe fn test_mm512_broadcast_i32x2() {
7980	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
7981	let r = _mm512_broadcast_i32x2(a);
7982	let e = _mm512_set_epi32(`3`, `4`, `3`, `4`, `3`, `4`, `3`, `4`, `3`, `4`, `3`, `4`, `3`, `4`, `3`, `4`);
7983	assert_eq_m512i(r, e);
7984	}
7985
7986	#[simd_test(enable = "avx512dq")]
7987	unsafe fn test_mm512_mask_broadcast_i32x2() {
7988	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
7989	let b = _mm512_set_epi32(`5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`);
7990	let r = _mm512_mask_broadcast_i32x2(b, `0b0110100100111100`, a);
7991	let e = _mm512_set_epi32(`5`, `4`, `3`, `8`, `3`, `10`, `11`, `4`, `13`, `14`, `3`, `4`, `3`, `4`, `19`, `20`);
7992	assert_eq_m512i(r, e);
7993	}
7994
7995	#[simd_test(enable = "avx512dq")]
7996	unsafe fn test_mm512_maskz_broadcast_i32x2() {
7997	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
7998	let r = _mm512_maskz_broadcast_i32x2(`0b0110100100111100`, a);
7999	let e = _mm512_set_epi32(`0`, `4`, `3`, `0`, `3`, `0`, `0`, `4`, `0`, `0`, `3`, `4`, `3`, `4`, `0`, `0`);
8000	assert_eq_m512i(r, e);
8001	}
8002
8003	#[simd_test(enable = "avx512dq")]
8004	unsafe fn test_mm512_broadcast_i32x8() {
8005	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8006	let r = _mm512_broadcast_i32x8(a);
8007	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8008	assert_eq_m512i(r, e);
8009	}
8010
8011	#[simd_test(enable = "avx512dq")]
8012	unsafe fn test_mm512_mask_broadcast_i32x8() {
8013	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8014	let b = _mm512_set_epi32(
8015	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
8016	);
8017	let r = _mm512_mask_broadcast_i32x8(b, `0b0110100100111100`, a);
8018	let e = _mm512_set_epi32(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`, `17`, `18`, `3`, `4`, `5`, `6`, `23`, `24`);
8019	assert_eq_m512i(r, e);
8020	}
8021
8022	#[simd_test(enable = "avx512dq")]
8023	unsafe fn test_mm512_maskz_broadcast_i32x8() {
8024	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8025	let r = _mm512_maskz_broadcast_i32x8(`0b0110100100111100`, a);
8026	let e = _mm512_set_epi32(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`, `0`, `0`, `3`, `4`, `5`, `6`, `0`, `0`);
8027	assert_eq_m512i(r, e);
8028	}
8029
8030	#[simd_test(enable = "avx512dq,avx512vl")]
8031	unsafe fn test_mm256_broadcast_i64x2() {
8032	let a = _mm_set_epi64x(`1`, `2`);
8033	let r = _mm256_broadcast_i64x2(a);
8034	let e = _mm256_set_epi64x(`1`, `2`, `1`, `2`);
8035	assert_eq_m256i(r, e);
8036	}
8037
8038	#[simd_test(enable = "avx512dq,avx512vl")]
8039	unsafe fn test_mm256_mask_broadcast_i64x2() {
8040	let a = _mm_set_epi64x(`1`, `2`);
8041	let b = _mm256_set_epi64x(`3`, `4`, `5`, `6`);
8042	let r = _mm256_mask_broadcast_i64x2(b, `0b0110`, a);
8043	let e = _mm256_set_epi64x(`3`, `2`, `1`, `6`);
8044	assert_eq_m256i(r, e);
8045	}
8046
8047	#[simd_test(enable = "avx512dq,avx512vl")]
8048	unsafe fn test_mm256_maskz_broadcast_i64x2() {
8049	let a = _mm_set_epi64x(`1`, `2`);
8050	let r = _mm256_maskz_broadcast_i64x2(`0b0110`, a);
8051	let e = _mm256_set_epi64x(`0`, `2`, `1`, `0`);
8052	assert_eq_m256i(r, e);
8053	}
8054
8055	#[simd_test(enable = "avx512dq")]
8056	unsafe fn test_mm512_broadcast_i64x2() {
8057	let a = _mm_set_epi64x(`1`, `2`);
8058	let r = _mm512_broadcast_i64x2(a);
8059	let e = _mm512_set_epi64(`1`, `2`, `1`, `2`, `1`, `2`, `1`, `2`);
8060	assert_eq_m512i(r, e);
8061	}
8062
8063	#[simd_test(enable = "avx512dq")]
8064	unsafe fn test_mm512_mask_broadcast_i64x2() {
8065	let a = _mm_set_epi64x(`1`, `2`);
8066	let b = _mm512_set_epi64(`3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`);
8067	let r = _mm512_mask_broadcast_i64x2(b, `0b01101001`, a);
8068	let e = _mm512_set_epi64(`3`, `2`, `1`, `6`, `1`, `8`, `9`, `2`);
8069	assert_eq_m512i(r, e);
8070	}
8071
8072	#[simd_test(enable = "avx512dq")]
8073	unsafe fn test_mm512_maskz_broadcast_i64x2() {
8074	let a = _mm_set_epi64x(`1`, `2`);
8075	let r = _mm512_maskz_broadcast_i64x2(`0b01101001`, a);
8076	let e = _mm512_set_epi64(`0`, `2`, `1`, `0`, `1`, `0`, `0`, `2`);
8077	assert_eq_m512i(r, e);
8078	}
8079
8080	#[simd_test(enable = "avx512dq")]
8081	unsafe fn test_mm512_extractf32x8_ps() {
8082	let a = _mm512_set_ps(
8083	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
8084	);
8085	let r = _mm512_extractf32x8_ps::<`1`>(a);
8086	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8087	assert_eq_m256(r, e);
8088	}
8089
8090	#[simd_test(enable = "avx512dq")]
8091	unsafe fn test_mm512_mask_extractf32x8_ps() {
8092	let a = _mm512_set_ps(
8093	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
8094	);
8095	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
8096	let r = _mm512_mask_extractf32x8_ps::<`1`>(b, `0b01101001`, a);
8097	let e = _mm256_set_ps(`17.`, `2.`, `3.`, `20.`, `5.`, `22.`, `23.`, `8.`);
8098	assert_eq_m256(r, e);
8099	}
8100
8101	#[simd_test(enable = "avx512dq")]
8102	unsafe fn test_mm512_maskz_extractf32x8_ps() {
8103	let a = _mm512_set_ps(
8104	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
8105	);
8106	let r = _mm512_maskz_extractf32x8_ps::<`1`>(`0b01101001`, a);
8107	let e = _mm256_set_ps(`0.`, `2.`, `3.`, `0.`, `5.`, `0.`, `0.`, `8.`);
8108	assert_eq_m256(r, e);
8109	}
8110
8111	#[simd_test(enable = "avx512dq,avx512vl")]
8112	unsafe fn test_mm256_extractf64x2_pd() {
8113	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
8114	let r = _mm256_extractf64x2_pd::<`1`>(a);
8115	let e = _mm_set_pd(`1.`, `2.`);
8116	assert_eq_m128d(r, e);
8117	}
8118
8119	#[simd_test(enable = "avx512dq,avx512vl")]
8120	unsafe fn test_mm256_mask_extractf64x2_pd() {
8121	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
8122	let b = _mm_set_pd(`5.`, `6.`);
8123	let r = _mm256_mask_extractf64x2_pd::<`1`>(b, `0b01`, a);
8124	let e = _mm_set_pd(`5.`, `2.`);
8125	assert_eq_m128d(r, e);
8126	}
8127
8128	#[simd_test(enable = "avx512dq,avx512vl")]
8129	unsafe fn test_mm256_maskz_extractf64x2_pd() {
8130	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
8131	let r = _mm256_maskz_extractf64x2_pd::<`1`>(`0b01`, a);
8132	let e = _mm_set_pd(`0.`, `2.`);
8133	assert_eq_m128d(r, e);
8134	}
8135
8136	#[simd_test(enable = "avx512dq")]
8137	unsafe fn test_mm512_extractf64x2_pd() {
8138	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8139	let r = _mm512_extractf64x2_pd::<`2`>(a);
8140	let e = _mm_set_pd(`3.`, `4.`);
8141	assert_eq_m128d(r, e);
8142	}
8143
8144	#[simd_test(enable = "avx512dq")]
8145	unsafe fn test_mm512_mask_extractf64x2_pd() {
8146	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8147	let b = _mm_set_pd(`9.`, `10.`);
8148	let r = _mm512_mask_extractf64x2_pd::<`2`>(b, `0b01`, a);
8149	let e = _mm_set_pd(`9.`, `4.`);
8150	assert_eq_m128d(r, e);
8151	}
8152
8153	#[simd_test(enable = "avx512dq")]
8154	unsafe fn test_mm512_maskz_extractf64x2_pd() {
8155	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8156	let r = _mm512_maskz_extractf64x2_pd::<`2`>(`0b01`, a);
8157	let e = _mm_set_pd(`0.`, `4.`);
8158	assert_eq_m128d(r, e);
8159	}
8160
8161	#[simd_test(enable = "avx512dq")]
8162	unsafe fn test_mm512_extracti32x8_epi32() {
8163	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
8164	let r = _mm512_extracti32x8_epi32::<`1`>(a);
8165	let e = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8166	assert_eq_m256i(r, e);
8167	}
8168
8169	#[simd_test(enable = "avx512dq")]
8170	unsafe fn test_mm512_mask_extracti32x8_epi32() {
8171	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
8172	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
8173	let r = _mm512_mask_extracti32x8_epi32::<`1`>(b, `0b01101001`, a);
8174	let e = _mm256_set_epi32(`17`, `2`, `3`, `20`, `5`, `22`, `23`, `8`);
8175	assert_eq_m256i(r, e);
8176	}
8177
8178	#[simd_test(enable = "avx512dq")]
8179	unsafe fn test_mm512_maskz_extracti32x8_epi32() {
8180	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
8181	let r = _mm512_maskz_extracti32x8_epi32::<`1`>(`0b01101001`, a);
8182	let e = _mm256_set_epi32(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
8183	assert_eq_m256i(r, e);
8184	}
8185
8186	#[simd_test(enable = "avx512dq,avx512vl")]
8187	unsafe fn test_mm256_extracti64x2_epi64() {
8188	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8189	let r = _mm256_extracti64x2_epi64::<`1`>(a);
8190	let e = _mm_set_epi64x(`1`, `2`);
8191	assert_eq_m128i(r, e);
8192	}
8193
8194	#[simd_test(enable = "avx512dq,avx512vl")]
8195	unsafe fn test_mm256_mask_extracti64x2_epi64() {
8196	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8197	let b = _mm_set_epi64x(`5`, `6`);
8198	let r = _mm256_mask_extracti64x2_epi64::<`1`>(b, `0b01`, a);
8199	let e = _mm_set_epi64x(`5`, `2`);
8200	assert_eq_m128i(r, e);
8201	}
8202
8203	#[simd_test(enable = "avx512dq,avx512vl")]
8204	unsafe fn test_mm256_maskz_extracti64x2_epi64() {
8205	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8206	let r = _mm256_maskz_extracti64x2_epi64::<`1`>(`0b01`, a);
8207	let e = _mm_set_epi64x(`0`, `2`);
8208	assert_eq_m128i(r, e);
8209	}
8210
8211	#[simd_test(enable = "avx512dq")]
8212	unsafe fn test_mm512_extracti64x2_epi64() {
8213	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8214	let r = _mm512_extracti64x2_epi64::<`2`>(a);
8215	let e = _mm_set_epi64x(`3`, `4`);
8216	assert_eq_m128i(r, e);
8217	}
8218
8219	#[simd_test(enable = "avx512dq")]
8220	unsafe fn test_mm512_mask_extracti64x2_epi64() {
8221	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8222	let b = _mm_set_epi64x(`9`, `10`);
8223	let r = _mm512_mask_extracti64x2_epi64::<`2`>(b, `0b01`, a);
8224	let e = _mm_set_epi64x(`9`, `4`);
8225	assert_eq_m128i(r, e);
8226	}
8227
8228	#[simd_test(enable = "avx512dq")]
8229	unsafe fn test_mm512_maskz_extracti64x2_epi64() {
8230	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8231	let r = _mm512_maskz_extracti64x2_epi64::<`2`>(`0b01`, a);
8232	let e = _mm_set_epi64x(`0`, `4`);
8233	assert_eq_m128i(r, e);
8234	}
8235
8236	#[simd_test(enable = "avx512dq")]
8237	unsafe fn test_mm512_insertf32x8() {
8238	let a = _mm512_set_ps(
8239	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
8240	);
8241	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
8242	let r = _mm512_insertf32x8::<`1`>(a, b);
8243	let e = _mm512_set_ps(
8244	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
8245	);
8246	assert_eq_m512(r, e);
8247	}
8248
8249	#[simd_test(enable = "avx512dq")]
8250	unsafe fn test_mm512_mask_insertf32x8() {
8251	let a = _mm512_set_ps(
8252	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
8253	);
8254	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
8255	let src = _mm512_set_ps(
8256	`25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`,
8257	);
8258	let r = _mm512_mask_insertf32x8::<`1`>(src, `0b0110100100111100`, a, b);
8259	let e = _mm512_set_ps(
8260	`25.`, `18.`, `19.`, `28.`, `21.`, `30.`, `31.`, `24.`, `33.`, `34.`, `11.`, `12.`, `13.`, `14.`, `39.`, `40.`,
8261	);
8262	assert_eq_m512(r, e);
8263	}
8264
8265	#[simd_test(enable = "avx512dq")]
8266	unsafe fn test_mm512_maskz_insertf32x8() {
8267	let a = _mm512_set_ps(
8268	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
8269	);
8270	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
8271	let r = _mm512_maskz_insertf32x8::<`1`>(`0b0110100100111100`, a, b);
8272	let e = _mm512_set_ps(
8273	`0.`, `18.`, `19.`, `0.`, `21.`, `0.`, `0.`, `24.`, `0.`, `0.`, `11.`, `12.`, `13.`, `14.`, `0.`, `0.`,
8274	);
8275	assert_eq_m512(r, e);
8276	}
8277
8278	#[simd_test(enable = "avx512dq,avx512vl")]
8279	unsafe fn test_mm256_insertf64x2() {
8280	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
8281	let b = _mm_set_pd(`5.`, `6.`);
8282	let r = _mm256_insertf64x2::<`1`>(a, b);
8283	let e = _mm256_set_pd(`5.`, `6.`, `3.`, `4.`);
8284	assert_eq_m256d(r, e);
8285	}
8286
8287	#[simd_test(enable = "avx512dq,avx512vl")]
8288	unsafe fn test_mm256_mask_insertf64x2() {
8289	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
8290	let b = _mm_set_pd(`5.`, `6.`);
8291	let src = _mm256_set_pd(`7.`, `8.`, `9.`, `10.`);
8292	let r = _mm256_mask_insertf64x2::<`1`>(src, `0b0110`, a, b);
8293	let e = _mm256_set_pd(`7.`, `6.`, `3.`, `10.`);
8294	assert_eq_m256d(r, e);
8295	}
8296
8297	#[simd_test(enable = "avx512dq,avx512vl")]
8298	unsafe fn test_mm256_maskz_insertf64x2() {
8299	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
8300	let b = _mm_set_pd(`5.`, `6.`);
8301	let r = _mm256_maskz_insertf64x2::<`1`>(`0b0110`, a, b);
8302	let e = _mm256_set_pd(`0.`, `6.`, `3.`, `0.`);
8303	assert_eq_m256d(r, e);
8304	}
8305
8306	#[simd_test(enable = "avx512dq")]
8307	unsafe fn test_mm512_insertf64x2() {
8308	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8309	let b = _mm_set_pd(`9.`, `10.`);
8310	let r = _mm512_insertf64x2::<`2`>(a, b);
8311	let e = _mm512_set_pd(`1.`, `2.`, `9.`, `10.`, `5.`, `6.`, `7.`, `8.`);
8312	assert_eq_m512d(r, e);
8313	}
8314
8315	#[simd_test(enable = "avx512dq")]
8316	unsafe fn test_mm512_mask_insertf64x2() {
8317	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8318	let b = _mm_set_pd(`9.`, `10.`);
8319	let src = _mm512_set_pd(`11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`);
8320	let r = _mm512_mask_insertf64x2::<`2`>(src, `0b01101001`, a, b);
8321	let e = _mm512_set_pd(`11.`, `2.`, `9.`, `14.`, `5.`, `16.`, `17.`, `8.`);
8322	assert_eq_m512d(r, e);
8323	}
8324
8325	#[simd_test(enable = "avx512dq")]
8326	unsafe fn test_mm512_maskz_insertf64x2() {
8327	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8328	let b = _mm_set_pd(`9.`, `10.`);
8329	let r = _mm512_maskz_insertf64x2::<`2`>(`0b01101001`, a, b);
8330	let e = _mm512_set_pd(`0.`, `2.`, `9.`, `0.`, `5.`, `0.`, `0.`, `8.`);
8331	assert_eq_m512d(r, e);
8332	}
8333
8334	#[simd_test(enable = "avx512dq")]
8335	unsafe fn test_mm512_inserti32x8() {
8336	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
8337	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
8338	let r = _mm512_inserti32x8::<`1`>(a, b);
8339	let e = _mm512_set_epi32(
8340	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
8341	);
8342	assert_eq_m512i(r, e);
8343	}
8344
8345	#[simd_test(enable = "avx512dq")]
8346	unsafe fn test_mm512_mask_inserti32x8() {
8347	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
8348	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
8349	let src = _mm512_set_epi32(
8350	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`,
8351	);
8352	let r = _mm512_mask_inserti32x8::<`1`>(src, `0b0110100100111100`, a, b);
8353	let e = _mm512_set_epi32(
8354	`25`, `18`, `19`, `28`, `21`, `30`, `31`, `24`, `33`, `34`, `11`, `12`, `13`, `14`, `39`, `40`,
8355	);
8356	assert_eq_m512i(r, e);
8357	}
8358
8359	#[simd_test(enable = "avx512dq")]
8360	unsafe fn test_mm512_maskz_inserti32x8() {
8361	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
8362	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
8363	let r = _mm512_maskz_inserti32x8::<`1`>(`0b0110100100111100`, a, b);
8364	let e = _mm512_set_epi32(`0`, `18`, `19`, `0`, `21`, `0`, `0`, `24`, `0`, `0`, `11`, `12`, `13`, `14`, `0`, `0`);
8365	assert_eq_m512i(r, e);
8366	}
8367
8368	#[simd_test(enable = "avx512dq,avx512vl")]
8369	unsafe fn test_mm256_inserti64x2() {
8370	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8371	let b = _mm_set_epi64x(`5`, `6`);
8372	let r = _mm256_inserti64x2::<`1`>(a, b);
8373	let e = _mm256_set_epi64x(`5`, `6`, `3`, `4`);
8374	assert_eq_m256i(r, e);
8375	}
8376
8377	#[simd_test(enable = "avx512dq,avx512vl")]
8378	unsafe fn test_mm256_mask_inserti64x2() {
8379	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8380	let b = _mm_set_epi64x(`5`, `6`);
8381	let src = _mm256_set_epi64x(`7`, `8`, `9`, `10`);
8382	let r = _mm256_mask_inserti64x2::<`1`>(src, `0b0110`, a, b);
8383	let e = _mm256_set_epi64x(`7`, `6`, `3`, `10`);
8384	assert_eq_m256i(r, e);
8385	}
8386
8387	#[simd_test(enable = "avx512dq,avx512vl")]
8388	unsafe fn test_mm256_maskz_inserti64x2() {
8389	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8390	let b = _mm_set_epi64x(`5`, `6`);
8391	let r = _mm256_maskz_inserti64x2::<`1`>(`0b0110`, a, b);
8392	let e = _mm256_set_epi64x(`0`, `6`, `3`, `0`);
8393	assert_eq_m256i(r, e);
8394	}
8395
8396	#[simd_test(enable = "avx512dq")]
8397	unsafe fn test_mm512_inserti64x2() {
8398	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8399	let b = _mm_set_epi64x(`9`, `10`);
8400	let r = _mm512_inserti64x2::<`2`>(a, b);
8401	let e = _mm512_set_epi64(`1`, `2`, `9`, `10`, `5`, `6`, `7`, `8`);
8402	assert_eq_m512i(r, e);
8403	}
8404
8405	#[simd_test(enable = "avx512dq")]
8406	unsafe fn test_mm512_mask_inserti64x2() {
8407	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8408	let b = _mm_set_epi64x(`9`, `10`);
8409	let src = _mm512_set_epi64(`11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`);
8410	let r = _mm512_mask_inserti64x2::<`2`>(src, `0b01101001`, a, b);
8411	let e = _mm512_set_epi64(`11`, `2`, `9`, `14`, `5`, `16`, `17`, `8`);
8412	assert_eq_m512i(r, e);
8413	}
8414
8415	#[simd_test(enable = "avx512dq")]
8416	unsafe fn test_mm512_maskz_inserti64x2() {
8417	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8418	let b = _mm_set_epi64x(`9`, `10`);
8419	let r = _mm512_maskz_inserti64x2::<`2`>(`0b01101001`, a, b);
8420	let e = _mm512_set_epi64(`0`, `2`, `9`, `0`, `5`, `0`, `0`, `8`);
8421	assert_eq_m512i(r, e);
8422	}
8423
8424	#[simd_test(enable = "avx512dq")]
8425	unsafe fn test_mm512_cvt_roundepi64_pd() {
8426	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8427	let r = _mm512_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
8428	let e = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8429	assert_eq_m512d(r, e);
8430	}
8431
8432	#[simd_test(enable = "avx512dq")]
8433	unsafe fn test_mm512_mask_cvt_roundepi64_pd() {
8434	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8435	let b = _mm512_set_pd(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
8436	let r = _mm512_mask_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
8437	b, `0b01101001`, a,
8438	);
8439	let e = _mm512_set_pd(`9.`, `2.`, `3.`, `12.`, `5.`, `14.`, `15.`, `8.`);
8440	assert_eq_m512d(r, e);
8441	}
8442
8443	#[simd_test(enable = "avx512dq")]
8444	unsafe fn test_mm512_maskz_cvt_roundepi64_pd() {
8445	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8446	let r = _mm512_maskz_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
8447	`0b01101001`, a,
8448	);
8449	let e = _mm512_set_pd(`0.`, `2.`, `3.`, `0.`, `5.`, `0.`, `0.`, `8.`);
8450	assert_eq_m512d(r, e);
8451	}
8452
8453	#[simd_test(enable = "avx512dq,avx512vl")]
8454	unsafe fn test_mm_cvtepi64_pd() {
8455	let a = _mm_set_epi64x(`1`, `2`);
8456	let r = _mm_cvtepi64_pd(a);
8457	let e = _mm_set_pd(`1.`, `2.`);
8458	assert_eq_m128d(r, e);
8459	}
8460
8461	#[simd_test(enable = "avx512dq,avx512vl")]
8462	unsafe fn test_mm_mask_cvtepi64_pd() {
8463	let a = _mm_set_epi64x(`1`, `2`);
8464	let b = _mm_set_pd(`3.`, `4.`);
8465	let r = _mm_mask_cvtepi64_pd(b, `0b01`, a);
8466	let e = _mm_set_pd(`3.`, `2.`);
8467	assert_eq_m128d(r, e);
8468	}
8469
8470	#[simd_test(enable = "avx512dq,avx512vl")]
8471	unsafe fn test_mm_maskz_cvtepi64_pd() {
8472	let a = _mm_set_epi64x(`1`, `2`);
8473	let r = _mm_maskz_cvtepi64_pd(`0b01`, a);
8474	let e = _mm_set_pd(`0.`, `2.`);
8475	assert_eq_m128d(r, e);
8476	}
8477
8478	#[simd_test(enable = "avx512dq,avx512vl")]
8479	unsafe fn test_mm256_cvtepi64_pd() {
8480	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8481	let r = _mm256_cvtepi64_pd(a);
8482	let e = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
8483	assert_eq_m256d(r, e);
8484	}
8485
8486	#[simd_test(enable = "avx512dq,avx512vl")]
8487	unsafe fn test_mm256_mask_cvtepi64_pd() {
8488	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8489	let b = _mm256_set_pd(`5.`, `6.`, `7.`, `8.`);
8490	let r = _mm256_mask_cvtepi64_pd(b, `0b0110`, a);
8491	let e = _mm256_set_pd(`5.`, `2.`, `3.`, `8.`);
8492	assert_eq_m256d(r, e);
8493	}
8494
8495	#[simd_test(enable = "avx512dq,avx512vl")]
8496	unsafe fn test_mm256_maskz_cvtepi64_pd() {
8497	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8498	let r = _mm256_maskz_cvtepi64_pd(`0b0110`, a);
8499	let e = _mm256_set_pd(`0.`, `2.`, `3.`, `0.`);
8500	assert_eq_m256d(r, e);
8501	}
8502
8503	#[simd_test(enable = "avx512dq")]
8504	unsafe fn test_mm512_cvtepi64_pd() {
8505	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8506	let r = _mm512_cvtepi64_pd(a);
8507	let e = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8508	assert_eq_m512d(r, e);
8509	}
8510
8511	#[simd_test(enable = "avx512dq")]
8512	unsafe fn test_mm512_mask_cvtepi64_pd() {
8513	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8514	let b = _mm512_set_pd(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
8515	let r = _mm512_mask_cvtepi64_pd(b, `0b01101001`, a);
8516	let e = _mm512_set_pd(`9.`, `2.`, `3.`, `12.`, `5.`, `14.`, `15.`, `8.`);
8517	assert_eq_m512d(r, e);
8518	}
8519
8520	#[simd_test(enable = "avx512dq")]
8521	unsafe fn test_mm512_maskz_cvtepi64_pd() {
8522	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8523	let r = _mm512_maskz_cvtepi64_pd(`0b01101001`, a);
8524	let e = _mm512_set_pd(`0.`, `2.`, `3.`, `0.`, `5.`, `0.`, `0.`, `8.`);
8525	assert_eq_m512d(r, e);
8526	}
8527
8528	#[simd_test(enable = "avx512dq")]
8529	unsafe fn test_mm512_cvt_roundepi64_ps() {
8530	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8531	let r = _mm512_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
8532	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8533	assert_eq_m256(r, e);
8534	}
8535
8536	#[simd_test(enable = "avx512dq")]
8537	unsafe fn test_mm512_mask_cvt_roundepi64_ps() {
8538	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8539	let b = _mm256_set_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
8540	let r = _mm512_mask_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
8541	b, `0b01101001`, a,
8542	);
8543	let e = _mm256_set_ps(`9.`, `2.`, `3.`, `12.`, `5.`, `14.`, `15.`, `8.`);
8544	assert_eq_m256(r, e);
8545	}
8546
8547	#[simd_test(enable = "avx512dq")]
8548	unsafe fn test_mm512_maskz_cvt_roundepi64_ps() {
8549	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8550	let r = _mm512_maskz_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
8551	`0b01101001`, a,
8552	);
8553	let e = _mm256_set_ps(`0.`, `2.`, `3.`, `0.`, `5.`, `0.`, `0.`, `8.`);
8554	assert_eq_m256(r, e);
8555	}
8556
8557	#[simd_test(enable = "avx512dq,avx512vl")]
8558	unsafe fn test_mm_cvtepi64_ps() {
8559	let a = _mm_set_epi64x(`1`, `2`);
8560	let r = _mm_cvtepi64_ps(a);
8561	let e = _mm_set_ps(`0.`, `0.`, `1.`, `2.`);
8562	assert_eq_m128(r, e);
8563	}
8564
8565	#[simd_test(enable = "avx512dq,avx512vl")]
8566	unsafe fn test_mm_mask_cvtepi64_ps() {
8567	let a = _mm_set_epi64x(`1`, `2`);
8568	let b = _mm_set_ps(`3.`, `4.`, `5.`, `6.`);
8569	let r = _mm_mask_cvtepi64_ps(b, `0b01`, a);
8570	let e = _mm_set_ps(`0.`, `0.`, `5.`, `2.`);
8571	assert_eq_m128(r, e);
8572	}
8573
8574	#[simd_test(enable = "avx512dq,avx512vl")]
8575	unsafe fn test_mm_maskz_cvtepi64_ps() {
8576	let a = _mm_set_epi64x(`1`, `2`);
8577	let r = _mm_maskz_cvtepi64_ps(`0b01`, a);
8578	let e = _mm_set_ps(`0.`, `0.`, `0.`, `2.`);
8579	assert_eq_m128(r, e);
8580	}
8581
8582	#[simd_test(enable = "avx512dq,avx512vl")]
8583	unsafe fn test_mm256_cvtepi64_ps() {
8584	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8585	let r = _mm256_cvtepi64_ps(a);
8586	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
8587	assert_eq_m128(r, e);
8588	}
8589
8590	#[simd_test(enable = "avx512dq,avx512vl")]
8591	unsafe fn test_mm256_mask_cvtepi64_ps() {
8592	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8593	let b = _mm_set_ps(`5.`, `6.`, `7.`, `8.`);
8594	let r = _mm256_mask_cvtepi64_ps(b, `0b0110`, a);
8595	let e = _mm_set_ps(`5.`, `2.`, `3.`, `8.`);
8596	assert_eq_m128(r, e);
8597	}
8598
8599	#[simd_test(enable = "avx512dq,avx512vl")]
8600	unsafe fn test_mm256_maskz_cvtepi64_ps() {
8601	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8602	let r = _mm256_maskz_cvtepi64_ps(`0b0110`, a);
8603	let e = _mm_set_ps(`0.`, `2.`, `3.`, `0.`);
8604	assert_eq_m128(r, e);
8605	}
8606
8607	#[simd_test(enable = "avx512dq")]
8608	unsafe fn test_mm512_cvtepi64_ps() {
8609	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8610	let r = _mm512_cvtepi64_ps(a);
8611	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8612	assert_eq_m256(r, e);
8613	}
8614
8615	#[simd_test(enable = "avx512dq")]
8616	unsafe fn test_mm512_mask_cvtepi64_ps() {
8617	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8618	let b = _mm256_set_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
8619	let r = _mm512_mask_cvtepi64_ps(b, `0b01101001`, a);
8620	let e = _mm256_set_ps(`9.`, `2.`, `3.`, `12.`, `5.`, `14.`, `15.`, `8.`);
8621	assert_eq_m256(r, e);
8622	}
8623
8624	#[simd_test(enable = "avx512dq")]
8625	unsafe fn test_mm512_maskz_cvtepi64_ps() {
8626	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8627	let r = _mm512_maskz_cvtepi64_ps(`0b01101001`, a);
8628	let e = _mm256_set_ps(`0.`, `2.`, `3.`, `0.`, `5.`, `0.`, `0.`, `8.`);
8629	assert_eq_m256(r, e);
8630	}
8631
8632	#[simd_test(enable = "avx512dq")]
8633	unsafe fn test_mm512_cvt_roundepu64_pd() {
8634	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8635	let r = _mm512_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
8636	let e = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8637	assert_eq_m512d(r, e);
8638	}
8639
8640	#[simd_test(enable = "avx512dq")]
8641	unsafe fn test_mm512_mask_cvt_roundepu64_pd() {
8642	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8643	let b = _mm512_set_pd(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
8644	let r = _mm512_mask_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
8645	b, `0b01101001`, a,
8646	);
8647	let e = _mm512_set_pd(`9.`, `2.`, `3.`, `12.`, `5.`, `14.`, `15.`, `8.`);
8648	assert_eq_m512d(r, e);
8649	}
8650
8651	#[simd_test(enable = "avx512dq")]
8652	unsafe fn test_mm512_maskz_cvt_roundepu64_pd() {
8653	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8654	let r = _mm512_maskz_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
8655	`0b01101001`, a,
8656	);
8657	let e = _mm512_set_pd(`0.`, `2.`, `3.`, `0.`, `5.`, `0.`, `0.`, `8.`);
8658	assert_eq_m512d(r, e);
8659	}
8660
8661	#[simd_test(enable = "avx512dq,avx512vl")]
8662	unsafe fn test_mm_cvtepu64_pd() {
8663	let a = _mm_set_epi64x(`1`, `2`);
8664	let r = _mm_cvtepu64_pd(a);
8665	let e = _mm_set_pd(`1.`, `2.`);
8666	assert_eq_m128d(r, e);
8667	}
8668
8669	#[simd_test(enable = "avx512dq,avx512vl")]
8670	unsafe fn test_mm_mask_cvtepu64_pd() {
8671	let a = _mm_set_epi64x(`1`, `2`);
8672	let b = _mm_set_pd(`3.`, `4.`);
8673	let r = _mm_mask_cvtepu64_pd(b, `0b01`, a);
8674	let e = _mm_set_pd(`3.`, `2.`);
8675	assert_eq_m128d(r, e);
8676	}
8677
8678	#[simd_test(enable = "avx512dq,avx512vl")]
8679	unsafe fn test_mm_maskz_cvtepu64_pd() {
8680	let a = _mm_set_epi64x(`1`, `2`);
8681	let r = _mm_maskz_cvtepu64_pd(`0b01`, a);
8682	let e = _mm_set_pd(`0.`, `2.`);
8683	assert_eq_m128d(r, e);
8684	}
8685
8686	#[simd_test(enable = "avx512dq,avx512vl")]
8687	unsafe fn test_mm256_cvtepu64_pd() {
8688	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8689	let r = _mm256_cvtepu64_pd(a);
8690	let e = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
8691	assert_eq_m256d(r, e);
8692	}
8693
8694	#[simd_test(enable = "avx512dq,avx512vl")]
8695	unsafe fn test_mm256_mask_cvtepu64_pd() {
8696	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8697	let b = _mm256_set_pd(`5.`, `6.`, `7.`, `8.`);
8698	let r = _mm256_mask_cvtepu64_pd(b, `0b0110`, a);
8699	let e = _mm256_set_pd(`5.`, `2.`, `3.`, `8.`);
8700	assert_eq_m256d(r, e);
8701	}
8702
8703	#[simd_test(enable = "avx512dq,avx512vl")]
8704	unsafe fn test_mm256_maskz_cvtepu64_pd() {
8705	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8706	let r = _mm256_maskz_cvtepu64_pd(`0b0110`, a);
8707	let e = _mm256_set_pd(`0.`, `2.`, `3.`, `0.`);
8708	assert_eq_m256d(r, e);
8709	}
8710
8711	#[simd_test(enable = "avx512dq")]
8712	unsafe fn test_mm512_cvtepu64_pd() {
8713	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8714	let r = _mm512_cvtepu64_pd(a);
8715	let e = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8716	assert_eq_m512d(r, e);
8717	}
8718
8719	#[simd_test(enable = "avx512dq")]
8720	unsafe fn test_mm512_mask_cvtepu64_pd() {
8721	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8722	let b = _mm512_set_pd(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
8723	let r = _mm512_mask_cvtepu64_pd(b, `0b01101001`, a);
8724	let e = _mm512_set_pd(`9.`, `2.`, `3.`, `12.`, `5.`, `14.`, `15.`, `8.`);
8725	assert_eq_m512d(r, e);
8726	}
8727
8728	#[simd_test(enable = "avx512dq")]
8729	unsafe fn test_mm512_maskz_cvtepu64_pd() {
8730	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8731	let r = _mm512_maskz_cvtepu64_pd(`0b01101001`, a);
8732	let e = _mm512_set_pd(`0.`, `2.`, `3.`, `0.`, `5.`, `0.`, `0.`, `8.`);
8733	assert_eq_m512d(r, e);
8734	}
8735
8736	#[simd_test(enable = "avx512dq")]
8737	unsafe fn test_mm512_cvt_roundepu64_ps() {
8738	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8739	let r = _mm512_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
8740	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8741	assert_eq_m256(r, e);
8742	}
8743
8744	#[simd_test(enable = "avx512dq")]
8745	unsafe fn test_mm512_mask_cvt_roundepu64_ps() {
8746	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8747	let b = _mm256_set_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
8748	let r = _mm512_mask_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
8749	b, `0b01101001`, a,
8750	);
8751	let e = _mm256_set_ps(`9.`, `2.`, `3.`, `12.`, `5.`, `14.`, `15.`, `8.`);
8752	assert_eq_m256(r, e);
8753	}
8754
8755	#[simd_test(enable = "avx512dq")]
8756	unsafe fn test_mm512_maskz_cvt_roundepu64_ps() {
8757	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8758	let r = _mm512_maskz_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
8759	`0b01101001`, a,
8760	);
8761	let e = _mm256_set_ps(`0.`, `2.`, `3.`, `0.`, `5.`, `0.`, `0.`, `8.`);
8762	assert_eq_m256(r, e);
8763	}
8764
8765	#[simd_test(enable = "avx512dq,avx512vl")]
8766	unsafe fn test_mm_cvtepu64_ps() {
8767	let a = _mm_set_epi64x(`1`, `2`);
8768	let r = _mm_cvtepu64_ps(a);
8769	let e = _mm_set_ps(`0.`, `0.`, `1.`, `2.`);
8770	assert_eq_m128(r, e);
8771	}
8772
8773	#[simd_test(enable = "avx512dq,avx512vl")]
8774	unsafe fn test_mm_mask_cvtepu64_ps() {
8775	let a = _mm_set_epi64x(`1`, `2`);
8776	let b = _mm_set_ps(`3.`, `4.`, `5.`, `6.`);
8777	let r = _mm_mask_cvtepu64_ps(b, `0b01`, a);
8778	let e = _mm_set_ps(`0.`, `0.`, `5.`, `2.`);
8779	assert_eq_m128(r, e);
8780	}
8781
8782	#[simd_test(enable = "avx512dq,avx512vl")]
8783	unsafe fn test_mm_maskz_cvtepu64_ps() {
8784	let a = _mm_set_epi64x(`1`, `2`);
8785	let r = _mm_maskz_cvtepu64_ps(`0b01`, a);
8786	let e = _mm_set_ps(`0.`, `0.`, `0.`, `2.`);
8787	assert_eq_m128(r, e);
8788	}
8789
8790	#[simd_test(enable = "avx512dq,avx512vl")]
8791	unsafe fn test_mm256_cvtepu64_ps() {
8792	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8793	let r = _mm256_cvtepu64_ps(a);
8794	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
8795	assert_eq_m128(r, e);
8796	}
8797
8798	#[simd_test(enable = "avx512dq,avx512vl")]
8799	unsafe fn test_mm256_mask_cvtepu64_ps() {
8800	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8801	let b = _mm_set_ps(`5.`, `6.`, `7.`, `8.`);
8802	let r = _mm256_mask_cvtepu64_ps(b, `0b0110`, a);
8803	let e = _mm_set_ps(`5.`, `2.`, `3.`, `8.`);
8804	assert_eq_m128(r, e);
8805	}
8806
8807	#[simd_test(enable = "avx512dq,avx512vl")]
8808	unsafe fn test_mm256_maskz_cvtepu64_ps() {
8809	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8810	let r = _mm256_maskz_cvtepu64_ps(`0b0110`, a);
8811	let e = _mm_set_ps(`0.`, `2.`, `3.`, `0.`);
8812	assert_eq_m128(r, e);
8813	}
8814
8815	#[simd_test(enable = "avx512dq")]
8816	unsafe fn test_mm512_cvtepu64_ps() {
8817	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8818	let r = _mm512_cvtepu64_ps(a);
8819	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8820	assert_eq_m256(r, e);
8821	}
8822
8823	#[simd_test(enable = "avx512dq")]
8824	unsafe fn test_mm512_mask_cvtepu64_ps() {
8825	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8826	let b = _mm256_set_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
8827	let r = _mm512_mask_cvtepu64_ps(b, `0b01101001`, a);
8828	let e = _mm256_set_ps(`9.`, `2.`, `3.`, `12.`, `5.`, `14.`, `15.`, `8.`);
8829	assert_eq_m256(r, e);
8830	}
8831
8832	#[simd_test(enable = "avx512dq")]
8833	unsafe fn test_mm512_maskz_cvtepu64_ps() {
8834	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8835	let r = _mm512_maskz_cvtepu64_ps(`0b01101001`, a);
8836	let e = _mm256_set_ps(`0.`, `2.`, `3.`, `0.`, `5.`, `0.`, `0.`, `8.`);
8837	assert_eq_m256(r, e);
8838	}
8839
8840	#[simd_test(enable = "avx512dq")]
8841	unsafe fn test_mm512_cvt_roundpd_epi64() {
8842	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8843	let r = _mm512_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
8844	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8845	assert_eq_m512i(r, e);
8846	}
8847
8848	#[simd_test(enable = "avx512dq")]
8849	unsafe fn test_mm512_mask_cvt_roundpd_epi64() {
8850	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8851	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
8852	let r = _mm512_mask_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
8853	b, `0b01101001`, a,
8854	);
8855	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
8856	assert_eq_m512i(r, e);
8857	}
8858
8859	#[simd_test(enable = "avx512dq")]
8860	unsafe fn test_mm512_maskz_cvt_roundpd_epi64() {
8861	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8862	let r = _mm512_maskz_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
8863	`0b01101001`, a,
8864	);
8865	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
8866	assert_eq_m512i(r, e);
8867	}
8868
8869	#[simd_test(enable = "avx512dq,avx512vl")]
8870	unsafe fn test_mm_cvtpd_epi64() {
8871	let a = _mm_set_pd(`1.`, `2.`);
8872	let r = _mm_cvtpd_epi64(a);
8873	let e = _mm_set_epi64x(`1`, `2`);
8874	assert_eq_m128i(r, e);
8875	}
8876
8877	#[simd_test(enable = "avx512dq,avx512vl")]
8878	unsafe fn test_mm_mask_cvtpd_epi64() {
8879	let a = _mm_set_pd(`1.`, `2.`);
8880	let b = _mm_set_epi64x(`3`, `4`);
8881	let r = _mm_mask_cvtpd_epi64(b, `0b01`, a);
8882	let e = _mm_set_epi64x(`3`, `2`);
8883	assert_eq_m128i(r, e);
8884	}
8885
8886	#[simd_test(enable = "avx512dq,avx512vl")]
8887	unsafe fn test_mm_maskz_cvtpd_epi64() {
8888	let a = _mm_set_pd(`1.`, `2.`);
8889	let r = _mm_maskz_cvtpd_epi64(`0b01`, a);
8890	let e = _mm_set_epi64x(`0`, `2`);
8891	assert_eq_m128i(r, e);
8892	}
8893
8894	#[simd_test(enable = "avx512dq,avx512vl")]
8895	unsafe fn test_mm256_cvtpd_epi64() {
8896	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
8897	let r = _mm256_cvtpd_epi64(a);
8898	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
8899	assert_eq_m256i(r, e);
8900	}
8901
8902	#[simd_test(enable = "avx512dq,avx512vl")]
8903	unsafe fn test_mm256_mask_cvtpd_epi64() {
8904	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
8905	let b = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
8906	let r = _mm256_mask_cvtpd_epi64(b, `0b0110`, a);
8907	let e = _mm256_set_epi64x(`5`, `2`, `3`, `8`);
8908	assert_eq_m256i(r, e);
8909	}
8910
8911	#[simd_test(enable = "avx512dq,avx512vl")]
8912	unsafe fn test_mm256_maskz_cvtpd_epi64() {
8913	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
8914	let r = _mm256_maskz_cvtpd_epi64(`0b0110`, a);
8915	let e = _mm256_set_epi64x(`0`, `2`, `3`, `0`);
8916	assert_eq_m256i(r, e);
8917	}
8918
8919	#[simd_test(enable = "avx512dq")]
8920	unsafe fn test_mm512_cvtpd_epi64() {
8921	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8922	let r = _mm512_cvtpd_epi64(a);
8923	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8924	assert_eq_m512i(r, e);
8925	}
8926
8927	#[simd_test(enable = "avx512dq")]
8928	unsafe fn test_mm512_mask_cvtpd_epi64() {
8929	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8930	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
8931	let r = _mm512_mask_cvtpd_epi64(b, `0b01101001`, a);
8932	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
8933	assert_eq_m512i(r, e);
8934	}
8935
8936	#[simd_test(enable = "avx512dq")]
8937	unsafe fn test_mm512_maskz_cvtpd_epi64() {
8938	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8939	let r = _mm512_maskz_cvtpd_epi64(`0b01101001`, a);
8940	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
8941	assert_eq_m512i(r, e);
8942	}
8943
8944	#[simd_test(enable = "avx512dq")]
8945	unsafe fn test_mm512_cvt_roundps_epi64() {
8946	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8947	let r = _mm512_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
8948	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
8949	assert_eq_m512i(r, e);
8950	}
8951
8952	#[simd_test(enable = "avx512dq")]
8953	unsafe fn test_mm512_mask_cvt_roundps_epi64() {
8954	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8955	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
8956	let r = _mm512_mask_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
8957	b, `0b01101001`, a,
8958	);
8959	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
8960	assert_eq_m512i(r, e);
8961	}
8962
8963	#[simd_test(enable = "avx512dq")]
8964	unsafe fn test_mm512_maskz_cvt_roundps_epi64() {
8965	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
8966	let r = _mm512_maskz_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
8967	`0b01101001`, a,
8968	);
8969	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
8970	assert_eq_m512i(r, e);
8971	}
8972
8973	#[simd_test(enable = "avx512dq,avx512vl")]
8974	unsafe fn test_mm_cvtps_epi64() {
8975	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
8976	let r = _mm_cvtps_epi64(a);
8977	let e = _mm_set_epi64x(`3`, `4`);
8978	assert_eq_m128i(r, e);
8979	}
8980
8981	#[simd_test(enable = "avx512dq,avx512vl")]
8982	unsafe fn test_mm_mask_cvtps_epi64() {
8983	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
8984	let b = _mm_set_epi64x(`5`, `6`);
8985	let r = _mm_mask_cvtps_epi64(b, `0b01`, a);
8986	let e = _mm_set_epi64x(`5`, `4`);
8987	assert_eq_m128i(r, e);
8988	}
8989
8990	#[simd_test(enable = "avx512dq,avx512vl")]
8991	unsafe fn test_mm_maskz_cvtps_epi64() {
8992	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
8993	let r = _mm_maskz_cvtps_epi64(`0b01`, a);
8994	let e = _mm_set_epi64x(`0`, `4`);
8995	assert_eq_m128i(r, e);
8996	}
8997
8998	#[simd_test(enable = "avx512dq,avx512vl")]
8999	unsafe fn test_mm256_cvtps_epi64() {
9000	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9001	let r = _mm256_cvtps_epi64(a);
9002	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
9003	assert_eq_m256i(r, e);
9004	}
9005
9006	#[simd_test(enable = "avx512dq,avx512vl")]
9007	unsafe fn test_mm256_mask_cvtps_epi64() {
9008	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9009	let b = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
9010	let r = _mm256_mask_cvtps_epi64(b, `0b0110`, a);
9011	let e = _mm256_set_epi64x(`5`, `2`, `3`, `8`);
9012	assert_eq_m256i(r, e);
9013	}
9014
9015	#[simd_test(enable = "avx512dq,avx512vl")]
9016	unsafe fn test_mm256_maskz_cvtps_epi64() {
9017	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9018	let r = _mm256_maskz_cvtps_epi64(`0b0110`, a);
9019	let e = _mm256_set_epi64x(`0`, `2`, `3`, `0`);
9020	assert_eq_m256i(r, e);
9021	}
9022
9023	#[simd_test(enable = "avx512dq")]
9024	unsafe fn test_mm512_cvtps_epi64() {
9025	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9026	let r = _mm512_cvtps_epi64(a);
9027	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9028	assert_eq_m512i(r, e);
9029	}
9030
9031	#[simd_test(enable = "avx512dq")]
9032	unsafe fn test_mm512_mask_cvtps_epi64() {
9033	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9034	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9035	let r = _mm512_mask_cvtps_epi64(b, `0b01101001`, a);
9036	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9037	assert_eq_m512i(r, e);
9038	}
9039
9040	#[simd_test(enable = "avx512dq")]
9041	unsafe fn test_mm512_maskz_cvtps_epi64() {
9042	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9043	let r = _mm512_maskz_cvtps_epi64(`0b01101001`, a);
9044	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9045	assert_eq_m512i(r, e);
9046	}
9047
9048	#[simd_test(enable = "avx512dq")]
9049	unsafe fn test_mm512_cvt_roundpd_epu64() {
9050	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9051	let r = _mm512_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
9052	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9053	assert_eq_m512i(r, e);
9054	}
9055
9056	#[simd_test(enable = "avx512dq")]
9057	unsafe fn test_mm512_mask_cvt_roundpd_epu64() {
9058	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9059	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9060	let r = _mm512_mask_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
9061	b, `0b01101001`, a,
9062	);
9063	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9064	assert_eq_m512i(r, e);
9065	}
9066
9067	#[simd_test(enable = "avx512dq")]
9068	unsafe fn test_mm512_maskz_cvt_roundpd_epu64() {
9069	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9070	let r = _mm512_maskz_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
9071	`0b01101001`, a,
9072	);
9073	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9074	assert_eq_m512i(r, e);
9075	}
9076
9077	#[simd_test(enable = "avx512dq,avx512vl")]
9078	unsafe fn test_mm_cvtpd_epu64() {
9079	let a = _mm_set_pd(`1.`, `2.`);
9080	let r = _mm_cvtpd_epu64(a);
9081	let e = _mm_set_epi64x(`1`, `2`);
9082	assert_eq_m128i(r, e);
9083	}
9084
9085	#[simd_test(enable = "avx512dq,avx512vl")]
9086	unsafe fn test_mm_mask_cvtpd_epu64() {
9087	let a = _mm_set_pd(`1.`, `2.`);
9088	let b = _mm_set_epi64x(`3`, `4`);
9089	let r = _mm_mask_cvtpd_epu64(b, `0b01`, a);
9090	let e = _mm_set_epi64x(`3`, `2`);
9091	assert_eq_m128i(r, e);
9092	}
9093
9094	#[simd_test(enable = "avx512dq,avx512vl")]
9095	unsafe fn test_mm_maskz_cvtpd_epu64() {
9096	let a = _mm_set_pd(`1.`, `2.`);
9097	let r = _mm_maskz_cvtpd_epu64(`0b01`, a);
9098	let e = _mm_set_epi64x(`0`, `2`);
9099	assert_eq_m128i(r, e);
9100	}
9101
9102	#[simd_test(enable = "avx512dq,avx512vl")]
9103	unsafe fn test_mm256_cvtpd_epu64() {
9104	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
9105	let r = _mm256_cvtpd_epu64(a);
9106	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
9107	assert_eq_m256i(r, e);
9108	}
9109
9110	#[simd_test(enable = "avx512dq,avx512vl")]
9111	unsafe fn test_mm256_mask_cvtpd_epu64() {
9112	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
9113	let b = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
9114	let r = _mm256_mask_cvtpd_epu64(b, `0b0110`, a);
9115	let e = _mm256_set_epi64x(`5`, `2`, `3`, `8`);
9116	assert_eq_m256i(r, e);
9117	}
9118
9119	#[simd_test(enable = "avx512dq,avx512vl")]
9120	unsafe fn test_mm256_maskz_cvtpd_epu64() {
9121	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
9122	let r = _mm256_maskz_cvtpd_epu64(`0b0110`, a);
9123	let e = _mm256_set_epi64x(`0`, `2`, `3`, `0`);
9124	assert_eq_m256i(r, e);
9125	}
9126
9127	#[simd_test(enable = "avx512dq")]
9128	unsafe fn test_mm512_cvtpd_epu64() {
9129	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9130	let r = _mm512_cvtpd_epu64(a);
9131	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9132	assert_eq_m512i(r, e);
9133	}
9134
9135	#[simd_test(enable = "avx512dq")]
9136	unsafe fn test_mm512_mask_cvtpd_epu64() {
9137	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9138	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9139	let r = _mm512_mask_cvtpd_epu64(b, `0b01101001`, a);
9140	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9141	assert_eq_m512i(r, e);
9142	}
9143
9144	#[simd_test(enable = "avx512dq")]
9145	unsafe fn test_mm512_maskz_cvtpd_epu64() {
9146	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9147	let r = _mm512_maskz_cvtpd_epu64(`0b01101001`, a);
9148	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9149	assert_eq_m512i(r, e);
9150	}
9151
9152	#[simd_test(enable = "avx512dq")]
9153	unsafe fn test_mm512_cvt_roundps_epu64() {
9154	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9155	let r = _mm512_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
9156	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9157	assert_eq_m512i(r, e);
9158	}
9159
9160	#[simd_test(enable = "avx512dq")]
9161	unsafe fn test_mm512_mask_cvt_roundps_epu64() {
9162	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9163	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9164	let r = _mm512_mask_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
9165	b, `0b01101001`, a,
9166	);
9167	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9168	assert_eq_m512i(r, e);
9169	}
9170
9171	#[simd_test(enable = "avx512dq")]
9172	unsafe fn test_mm512_maskz_cvt_roundps_epu64() {
9173	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9174	let r = _mm512_maskz_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
9175	`0b01101001`, a,
9176	);
9177	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9178	assert_eq_m512i(r, e);
9179	}
9180
9181	#[simd_test(enable = "avx512dq,avx512vl")]
9182	unsafe fn test_mm_cvtps_epu64() {
9183	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9184	let r = _mm_cvtps_epu64(a);
9185	let e = _mm_set_epi64x(`3`, `4`);
9186	assert_eq_m128i(r, e);
9187	}
9188
9189	#[simd_test(enable = "avx512dq,avx512vl")]
9190	unsafe fn test_mm_mask_cvtps_epu64() {
9191	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9192	let b = _mm_set_epi64x(`5`, `6`);
9193	let r = _mm_mask_cvtps_epu64(b, `0b01`, a);
9194	let e = _mm_set_epi64x(`5`, `4`);
9195	assert_eq_m128i(r, e);
9196	}
9197
9198	#[simd_test(enable = "avx512dq,avx512vl")]
9199	unsafe fn test_mm_maskz_cvtps_epu64() {
9200	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9201	let r = _mm_maskz_cvtps_epu64(`0b01`, a);
9202	let e = _mm_set_epi64x(`0`, `4`);
9203	assert_eq_m128i(r, e);
9204	}
9205
9206	#[simd_test(enable = "avx512dq,avx512vl")]
9207	unsafe fn test_mm256_cvtps_epu64() {
9208	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9209	let r = _mm256_cvtps_epu64(a);
9210	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
9211	assert_eq_m256i(r, e);
9212	}
9213
9214	#[simd_test(enable = "avx512dq,avx512vl")]
9215	unsafe fn test_mm256_mask_cvtps_epu64() {
9216	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9217	let b = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
9218	let r = _mm256_mask_cvtps_epu64(b, `0b0110`, a);
9219	let e = _mm256_set_epi64x(`5`, `2`, `3`, `8`);
9220	assert_eq_m256i(r, e);
9221	}
9222
9223	#[simd_test(enable = "avx512dq,avx512vl")]
9224	unsafe fn test_mm256_maskz_cvtps_epu64() {
9225	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9226	let r = _mm256_maskz_cvtps_epu64(`0b0110`, a);
9227	let e = _mm256_set_epi64x(`0`, `2`, `3`, `0`);
9228	assert_eq_m256i(r, e);
9229	}
9230
9231	#[simd_test(enable = "avx512dq")]
9232	unsafe fn test_mm512_cvtps_epu64() {
9233	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9234	let r = _mm512_cvtps_epu64(a);
9235	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9236	assert_eq_m512i(r, e);
9237	}
9238
9239	#[simd_test(enable = "avx512dq")]
9240	unsafe fn test_mm512_mask_cvtps_epu64() {
9241	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9242	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9243	let r = _mm512_mask_cvtps_epu64(b, `0b01101001`, a);
9244	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9245	assert_eq_m512i(r, e);
9246	}
9247
9248	#[simd_test(enable = "avx512dq")]
9249	unsafe fn test_mm512_maskz_cvtps_epu64() {
9250	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9251	let r = _mm512_maskz_cvtps_epu64(`0b01101001`, a);
9252	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9253	assert_eq_m512i(r, e);
9254	}
9255
9256	#[simd_test(enable = "avx512dq")]
9257	unsafe fn test_mm512_cvtt_roundpd_epi64() {
9258	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9259	let r = _mm512_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(a);
9260	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9261	assert_eq_m512i(r, e);
9262	}
9263
9264	#[simd_test(enable = "avx512dq")]
9265	unsafe fn test_mm512_mask_cvtt_roundpd_epi64() {
9266	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9267	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9268	let r = _mm512_mask_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(b, `0b01101001`, a);
9269	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9270	assert_eq_m512i(r, e);
9271	}
9272
9273	#[simd_test(enable = "avx512dq")]
9274	unsafe fn test_mm512_maskz_cvtt_roundpd_epi64() {
9275	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9276	let r = _mm512_maskz_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(`0b01101001`, a);
9277	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9278	assert_eq_m512i(r, e);
9279	}
9280
9281	#[simd_test(enable = "avx512dq,avx512vl")]
9282	unsafe fn test_mm_cvttpd_epi64() {
9283	let a = _mm_set_pd(`1.`, `2.`);
9284	let r = _mm_cvttpd_epi64(a);
9285	let e = _mm_set_epi64x(`1`, `2`);
9286	assert_eq_m128i(r, e);
9287	}
9288
9289	#[simd_test(enable = "avx512dq,avx512vl")]
9290	unsafe fn test_mm_mask_cvttpd_epi64() {
9291	let a = _mm_set_pd(`1.`, `2.`);
9292	let b = _mm_set_epi64x(`3`, `4`);
9293	let r = _mm_mask_cvttpd_epi64(b, `0b01`, a);
9294	let e = _mm_set_epi64x(`3`, `2`);
9295	assert_eq_m128i(r, e);
9296	}
9297
9298	#[simd_test(enable = "avx512dq,avx512vl")]
9299	unsafe fn test_mm_maskz_cvttpd_epi64() {
9300	let a = _mm_set_pd(`1.`, `2.`);
9301	let r = _mm_maskz_cvttpd_epi64(`0b01`, a);
9302	let e = _mm_set_epi64x(`0`, `2`);
9303	assert_eq_m128i(r, e);
9304	}
9305
9306	#[simd_test(enable = "avx512dq,avx512vl")]
9307	unsafe fn test_mm256_cvttpd_epi64() {
9308	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
9309	let r = _mm256_cvttpd_epi64(a);
9310	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
9311	assert_eq_m256i(r, e);
9312	}
9313
9314	#[simd_test(enable = "avx512dq,avx512vl")]
9315	unsafe fn test_mm256_mask_cvttpd_epi64() {
9316	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
9317	let b = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
9318	let r = _mm256_mask_cvttpd_epi64(b, `0b0110`, a);
9319	let e = _mm256_set_epi64x(`5`, `2`, `3`, `8`);
9320	assert_eq_m256i(r, e);
9321	}
9322
9323	#[simd_test(enable = "avx512dq,avx512vl")]
9324	unsafe fn test_mm256_maskz_cvttpd_epi64() {
9325	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
9326	let r = _mm256_maskz_cvttpd_epi64(`0b0110`, a);
9327	let e = _mm256_set_epi64x(`0`, `2`, `3`, `0`);
9328	assert_eq_m256i(r, e);
9329	}
9330
9331	#[simd_test(enable = "avx512dq")]
9332	unsafe fn test_mm512_cvttpd_epi64() {
9333	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9334	let r = _mm512_cvttpd_epi64(a);
9335	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9336	assert_eq_m512i(r, e);
9337	}
9338
9339	#[simd_test(enable = "avx512dq")]
9340	unsafe fn test_mm512_mask_cvttpd_epi64() {
9341	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9342	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9343	let r = _mm512_mask_cvttpd_epi64(b, `0b01101001`, a);
9344	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9345	assert_eq_m512i(r, e);
9346	}
9347
9348	#[simd_test(enable = "avx512dq")]
9349	unsafe fn test_mm512_maskz_cvttpd_epi64() {
9350	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9351	let r = _mm512_maskz_cvttpd_epi64(`0b01101001`, a);
9352	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9353	assert_eq_m512i(r, e);
9354	}
9355
9356	#[simd_test(enable = "avx512dq")]
9357	unsafe fn test_mm512_cvtt_roundps_epi64() {
9358	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9359	let r = _mm512_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(a);
9360	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9361	assert_eq_m512i(r, e);
9362	}
9363
9364	#[simd_test(enable = "avx512dq")]
9365	unsafe fn test_mm512_mask_cvtt_roundps_epi64() {
9366	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9367	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9368	let r = _mm512_mask_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(b, `0b01101001`, a);
9369	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9370	assert_eq_m512i(r, e);
9371	}
9372
9373	#[simd_test(enable = "avx512dq")]
9374	unsafe fn test_mm512_maskz_cvtt_roundps_epi64() {
9375	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9376	let r = _mm512_maskz_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(`0b01101001`, a);
9377	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9378	assert_eq_m512i(r, e);
9379	}
9380
9381	#[simd_test(enable = "avx512dq,avx512vl")]
9382	unsafe fn test_mm_cvttps_epi64() {
9383	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9384	let r = _mm_cvttps_epi64(a);
9385	let e = _mm_set_epi64x(`3`, `4`);
9386	assert_eq_m128i(r, e);
9387	}
9388
9389	#[simd_test(enable = "avx512dq,avx512vl")]
9390	unsafe fn test_mm_mask_cvttps_epi64() {
9391	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9392	let b = _mm_set_epi64x(`5`, `6`);
9393	let r = _mm_mask_cvttps_epi64(b, `0b01`, a);
9394	let e = _mm_set_epi64x(`5`, `4`);
9395	assert_eq_m128i(r, e);
9396	}
9397
9398	#[simd_test(enable = "avx512dq,avx512vl")]
9399	unsafe fn test_mm_maskz_cvttps_epi64() {
9400	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9401	let r = _mm_maskz_cvttps_epi64(`0b01`, a);
9402	let e = _mm_set_epi64x(`0`, `4`);
9403	assert_eq_m128i(r, e);
9404	}
9405
9406	#[simd_test(enable = "avx512dq,avx512vl")]
9407	unsafe fn test_mm256_cvttps_epi64() {
9408	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9409	let r = _mm256_cvttps_epi64(a);
9410	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
9411	assert_eq_m256i(r, e);
9412	}
9413
9414	#[simd_test(enable = "avx512dq,avx512vl")]
9415	unsafe fn test_mm256_mask_cvttps_epi64() {
9416	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9417	let b = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
9418	let r = _mm256_mask_cvttps_epi64(b, `0b0110`, a);
9419	let e = _mm256_set_epi64x(`5`, `2`, `3`, `8`);
9420	assert_eq_m256i(r, e);
9421	}
9422
9423	#[simd_test(enable = "avx512dq,avx512vl")]
9424	unsafe fn test_mm256_maskz_cvttps_epi64() {
9425	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9426	let r = _mm256_maskz_cvttps_epi64(`0b0110`, a);
9427	let e = _mm256_set_epi64x(`0`, `2`, `3`, `0`);
9428	assert_eq_m256i(r, e);
9429	}
9430
9431	#[simd_test(enable = "avx512dq")]
9432	unsafe fn test_mm512_cvttps_epi64() {
9433	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9434	let r = _mm512_cvttps_epi64(a);
9435	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9436	assert_eq_m512i(r, e);
9437	}
9438
9439	#[simd_test(enable = "avx512dq")]
9440	unsafe fn test_mm512_mask_cvttps_epi64() {
9441	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9442	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9443	let r = _mm512_mask_cvttps_epi64(b, `0b01101001`, a);
9444	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9445	assert_eq_m512i(r, e);
9446	}
9447
9448	#[simd_test(enable = "avx512dq")]
9449	unsafe fn test_mm512_maskz_cvttps_epi64() {
9450	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9451	let r = _mm512_maskz_cvttps_epi64(`0b01101001`, a);
9452	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9453	assert_eq_m512i(r, e);
9454	}
9455
9456	#[simd_test(enable = "avx512dq")]
9457	unsafe fn test_mm512_cvtt_roundpd_epu64() {
9458	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9459	let r = _mm512_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(a);
9460	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9461	assert_eq_m512i(r, e);
9462	}
9463
9464	#[simd_test(enable = "avx512dq")]
9465	unsafe fn test_mm512_mask_cvtt_roundpd_epu64() {
9466	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9467	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9468	let r = _mm512_mask_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(b, `0b01101001`, a);
9469	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9470	assert_eq_m512i(r, e);
9471	}
9472
9473	#[simd_test(enable = "avx512dq")]
9474	unsafe fn test_mm512_maskz_cvtt_roundpd_epu64() {
9475	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9476	let r = _mm512_maskz_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(`0b01101001`, a);
9477	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9478	assert_eq_m512i(r, e);
9479	}
9480
9481	#[simd_test(enable = "avx512dq,avx512vl")]
9482	unsafe fn test_mm_cvttpd_epu64() {
9483	let a = _mm_set_pd(`1.`, `2.`);
9484	let r = _mm_cvttpd_epu64(a);
9485	let e = _mm_set_epi64x(`1`, `2`);
9486	assert_eq_m128i(r, e);
9487	}
9488
9489	#[simd_test(enable = "avx512dq,avx512vl")]
9490	unsafe fn test_mm_mask_cvttpd_epu64() {
9491	let a = _mm_set_pd(`1.`, `2.`);
9492	let b = _mm_set_epi64x(`3`, `4`);
9493	let r = _mm_mask_cvttpd_epu64(b, `0b01`, a);
9494	let e = _mm_set_epi64x(`3`, `2`);
9495	assert_eq_m128i(r, e);
9496	}
9497
9498	#[simd_test(enable = "avx512dq,avx512vl")]
9499	unsafe fn test_mm_maskz_cvttpd_epu64() {
9500	let a = _mm_set_pd(`1.`, `2.`);
9501	let r = _mm_maskz_cvttpd_epu64(`0b01`, a);
9502	let e = _mm_set_epi64x(`0`, `2`);
9503	assert_eq_m128i(r, e);
9504	}
9505
9506	#[simd_test(enable = "avx512dq,avx512vl")]
9507	unsafe fn test_mm256_cvttpd_epu64() {
9508	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
9509	let r = _mm256_cvttpd_epu64(a);
9510	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
9511	assert_eq_m256i(r, e);
9512	}
9513
9514	#[simd_test(enable = "avx512dq,avx512vl")]
9515	unsafe fn test_mm256_mask_cvttpd_epu64() {
9516	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
9517	let b = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
9518	let r = _mm256_mask_cvttpd_epu64(b, `0b0110`, a);
9519	let e = _mm256_set_epi64x(`5`, `2`, `3`, `8`);
9520	assert_eq_m256i(r, e);
9521	}
9522
9523	#[simd_test(enable = "avx512dq,avx512vl")]
9524	unsafe fn test_mm256_maskz_cvttpd_epu64() {
9525	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
9526	let r = _mm256_maskz_cvttpd_epu64(`0b0110`, a);
9527	let e = _mm256_set_epi64x(`0`, `2`, `3`, `0`);
9528	assert_eq_m256i(r, e);
9529	}
9530
9531	#[simd_test(enable = "avx512dq")]
9532	unsafe fn test_mm512_cvttpd_epu64() {
9533	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9534	let r = _mm512_cvttpd_epu64(a);
9535	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9536	assert_eq_m512i(r, e);
9537	}
9538
9539	#[simd_test(enable = "avx512dq")]
9540	unsafe fn test_mm512_mask_cvttpd_epu64() {
9541	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9542	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9543	let r = _mm512_mask_cvttpd_epu64(b, `0b01101001`, a);
9544	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9545	assert_eq_m512i(r, e);
9546	}
9547
9548	#[simd_test(enable = "avx512dq")]
9549	unsafe fn test_mm512_maskz_cvttpd_epu64() {
9550	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9551	let r = _mm512_maskz_cvttpd_epu64(`0b01101001`, a);
9552	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9553	assert_eq_m512i(r, e);
9554	}
9555
9556	#[simd_test(enable = "avx512dq")]
9557	unsafe fn test_mm512_cvtt_roundps_epu64() {
9558	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9559	let r = _mm512_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(a);
9560	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9561	assert_eq_m512i(r, e);
9562	}
9563
9564	#[simd_test(enable = "avx512dq")]
9565	unsafe fn test_mm512_mask_cvtt_roundps_epu64() {
9566	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9567	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9568	let r = _mm512_mask_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(b, `0b01101001`, a);
9569	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9570	assert_eq_m512i(r, e);
9571	}
9572
9573	#[simd_test(enable = "avx512dq")]
9574	unsafe fn test_mm512_maskz_cvtt_roundps_epu64() {
9575	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9576	let r = _mm512_maskz_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(`0b01101001`, a);
9577	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9578	assert_eq_m512i(r, e);
9579	}
9580
9581	#[simd_test(enable = "avx512dq,avx512vl")]
9582	unsafe fn test_mm_cvttps_epu64() {
9583	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9584	let r = _mm_cvttps_epu64(a);
9585	let e = _mm_set_epi64x(`3`, `4`);
9586	assert_eq_m128i(r, e);
9587	}
9588
9589	#[simd_test(enable = "avx512dq,avx512vl")]
9590	unsafe fn test_mm_mask_cvttps_epu64() {
9591	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9592	let b = _mm_set_epi64x(`5`, `6`);
9593	let r = _mm_mask_cvttps_epu64(b, `0b01`, a);
9594	let e = _mm_set_epi64x(`5`, `4`);
9595	assert_eq_m128i(r, e);
9596	}
9597
9598	#[simd_test(enable = "avx512dq,avx512vl")]
9599	unsafe fn test_mm_maskz_cvttps_epu64() {
9600	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9601	let r = _mm_maskz_cvttps_epu64(`0b01`, a);
9602	let e = _mm_set_epi64x(`0`, `4`);
9603	assert_eq_m128i(r, e);
9604	}
9605
9606	#[simd_test(enable = "avx512dq,avx512vl")]
9607	unsafe fn test_mm256_cvttps_epu64() {
9608	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9609	let r = _mm256_cvttps_epu64(a);
9610	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
9611	assert_eq_m256i(r, e);
9612	}
9613
9614	#[simd_test(enable = "avx512dq,avx512vl")]
9615	unsafe fn test_mm256_mask_cvttps_epu64() {
9616	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9617	let b = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
9618	let r = _mm256_mask_cvttps_epu64(b, `0b0110`, a);
9619	let e = _mm256_set_epi64x(`5`, `2`, `3`, `8`);
9620	assert_eq_m256i(r, e);
9621	}
9622
9623	#[simd_test(enable = "avx512dq,avx512vl")]
9624	unsafe fn test_mm256_maskz_cvttps_epu64() {
9625	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
9626	let r = _mm256_maskz_cvttps_epu64(`0b0110`, a);
9627	let e = _mm256_set_epi64x(`0`, `2`, `3`, `0`);
9628	assert_eq_m256i(r, e);
9629	}
9630
9631	#[simd_test(enable = "avx512dq")]
9632	unsafe fn test_mm512_cvttps_epu64() {
9633	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9634	let r = _mm512_cvttps_epu64(a);
9635	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9636	assert_eq_m512i(r, e);
9637	}
9638
9639	#[simd_test(enable = "avx512dq")]
9640	unsafe fn test_mm512_mask_cvttps_epu64() {
9641	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9642	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9643	let r = _mm512_mask_cvttps_epu64(b, `0b01101001`, a);
9644	let e = _mm512_set_epi64(`9`, `2`, `3`, `12`, `5`, `14`, `15`, `8`);
9645	assert_eq_m512i(r, e);
9646	}
9647
9648	#[simd_test(enable = "avx512dq")]
9649	unsafe fn test_mm512_maskz_cvttps_epu64() {
9650	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
9651	let r = _mm512_maskz_cvttps_epu64(`0b01101001`, a);
9652	let e = _mm512_set_epi64(`0`, `2`, `3`, `0`, `5`, `0`, `0`, `8`);
9653	assert_eq_m512i(r, e);
9654	}
9655
9656	#[simd_test(enable = "avx512dq,avx512vl")]
9657	unsafe fn test_mm_mullo_epi64() {
9658	let a = _mm_set_epi64x(`1`, `2`);
9659	let b = _mm_set_epi64x(`3`, `4`);
9660	let r = _mm_mullo_epi64(a, b);
9661	let e = _mm_set_epi64x(`3`, `8`);
9662	assert_eq_m128i(r, e);
9663	}
9664
9665	#[simd_test(enable = "avx512dq,avx512vl")]
9666	unsafe fn test_mm_mask_mullo_epi64() {
9667	let a = _mm_set_epi64x(`1`, `2`);
9668	let b = _mm_set_epi64x(`3`, `4`);
9669	let c = _mm_set_epi64x(`5`, `6`);
9670	let r = _mm_mask_mullo_epi64(c, `0b01`, a, b);
9671	let e = _mm_set_epi64x(`5`, `8`);
9672	assert_eq_m128i(r, e);
9673	}
9674
9675	#[simd_test(enable = "avx512dq,avx512vl")]
9676	unsafe fn test_mm_maskz_mullo_epi64() {
9677	let a = _mm_set_epi64x(`1`, `2`);
9678	let b = _mm_set_epi64x(`3`, `4`);
9679	let r = _mm_maskz_mullo_epi64(`0b01`, a, b);
9680	let e = _mm_set_epi64x(`0`, `8`);
9681	assert_eq_m128i(r, e);
9682	}
9683
9684	#[simd_test(enable = "avx512dq,avx512vl")]
9685	unsafe fn test_mm256_mullo_epi64() {
9686	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
9687	let b = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
9688	let r = _mm256_mullo_epi64(a, b);
9689	let e = _mm256_set_epi64x(`5`, `12`, `21`, `32`);
9690	assert_eq_m256i(r, e);
9691	}
9692
9693	#[simd_test(enable = "avx512dq,avx512vl")]
9694	unsafe fn test_mm256_mask_mullo_epi64() {
9695	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
9696	let b = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
9697	let c = _mm256_set_epi64x(`9`, `10`, `11`, `12`);
9698	let r = _mm256_mask_mullo_epi64(c, `0b0110`, a, b);
9699	let e = _mm256_set_epi64x(`9`, `12`, `21`, `12`);
9700	assert_eq_m256i(r, e);
9701	}
9702
9703	#[simd_test(enable = "avx512dq,avx512vl")]
9704	unsafe fn test_mm256_maskz_mullo_epi64() {
9705	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
9706	let b = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
9707	let r = _mm256_maskz_mullo_epi64(`0b0110`, a, b);
9708	let e = _mm256_set_epi64x(`0`, `12`, `21`, `0`);
9709	assert_eq_m256i(r, e);
9710	}
9711
9712	#[simd_test(enable = "avx512dq")]
9713	unsafe fn test_mm512_mullo_epi64() {
9714	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9715	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9716	let r = _mm512_mullo_epi64(a, b);
9717	let e = _mm512_set_epi64(`9`, `20`, `33`, `48`, `65`, `84`, `105`, `128`);
9718	assert_eq_m512i(r, e);
9719	}
9720
9721	#[simd_test(enable = "avx512dq")]
9722	unsafe fn test_mm512_mask_mullo_epi64() {
9723	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9724	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9725	let c = _mm512_set_epi64(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
9726	let r = _mm512_mask_mullo_epi64(c, `0b01101001`, a, b);
9727	let e = _mm512_set_epi64(`17`, `20`, `33`, `20`, `65`, `22`, `23`, `128`);
9728	assert_eq_m512i(r, e);
9729	}
9730
9731	#[simd_test(enable = "avx512dq")]
9732	unsafe fn test_mm512_maskz_mullo_epi64() {
9733	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
9734	let b = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
9735	let r = _mm512_maskz_mullo_epi64(`0b01101001`, a, b);
9736	let e = _mm512_set_epi64(`0`, `20`, `33`, `0`, `65`, `0`, `0`, `128`);
9737	assert_eq_m512i(r, e);
9738	}
9739
9740	#[simd_test(enable = "avx512dq")]
9741	unsafe fn test_cvtmask8_u32() {
9742	let a: __mmask8 = `0b01101001`;
9743	let r = _cvtmask8_u32(a);
9744	let e: u32 = `0b01101001`;
9745	assert_eq!(r, e);
9746	}
9747
9748	#[simd_test(enable = "avx512dq")]
9749	unsafe fn test_cvtu32_mask8() {
9750	let a: u32 = `0b01101001`;
9751	let r = _cvtu32_mask8(a);
9752	let e: __mmask8 = `0b01101001`;
9753	assert_eq!(r, e);
9754	}
9755
9756	#[simd_test(enable = "avx512dq")]
9757	unsafe fn test_kadd_mask16() {
9758	let a: __mmask16 = `27549`;
9759	let b: __mmask16 = `23434`;
9760	let r = _kadd_mask16(a, b);
9761	let e: __mmask16 = `50983`;
9762	assert_eq!(r, e);
9763	}
9764
9765	#[simd_test(enable = "avx512dq")]
9766	unsafe fn test_kadd_mask8() {
9767	let a: __mmask8 = `98`;
9768	let b: __mmask8 = `117`;
9769	let r = _kadd_mask8(a, b);
9770	let e: __mmask8 = `215`;
9771	assert_eq!(r, e);
9772	}
9773
9774	#[simd_test(enable = "avx512dq")]
9775	unsafe fn test_kand_mask8() {
9776	let a: __mmask8 = `0b01101001`;
9777	let b: __mmask8 = `0b10110011`;
9778	let r = _kand_mask8(a, b);
9779	let e: __mmask8 = `0b00100001`;
9780	assert_eq!(r, e);
9781	}
9782
9783	#[simd_test(enable = "avx512dq")]
9784	unsafe fn test_kandn_mask8() {
9785	let a: __mmask8 = `0b01101001`;
9786	let b: __mmask8 = `0b10110011`;
9787	let r = _kandn_mask8(a, b);
9788	let e: __mmask8 = `0b10010010`;
9789	assert_eq!(r, e);
9790	}
9791
9792	#[simd_test(enable = "avx512dq")]
9793	unsafe fn test_knot_mask8() {
9794	let a: __mmask8 = `0b01101001`;
9795	let r = _knot_mask8(a);
9796	let e: __mmask8 = `0b10010110`;
9797	assert_eq!(r, e);
9798	}
9799
9800	#[simd_test(enable = "avx512dq")]
9801	unsafe fn test_kor_mask8() {
9802	let a: __mmask8 = `0b01101001`;
9803	let b: __mmask8 = `0b10110011`;
9804	let r = _kor_mask8(a, b);
9805	let e: __mmask8 = `0b11111011`;
9806	assert_eq!(r, e);
9807	}
9808
9809	#[simd_test(enable = "avx512dq")]
9810	unsafe fn test_kxnor_mask8() {
9811	let a: __mmask8 = `0b01101001`;
9812	let b: __mmask8 = `0b10110011`;
9813	let r = _kxnor_mask8(a, b);
9814	let e: __mmask8 = `0b00100101`;
9815	assert_eq!(r, e);
9816	}
9817
9818	#[simd_test(enable = "avx512dq")]
9819	unsafe fn test_kxor_mask8() {
9820	let a: __mmask8 = `0b01101001`;
9821	let b: __mmask8 = `0b10110011`;
9822	let r = _kxor_mask8(a, b);
9823	let e: __mmask8 = `0b11011010`;
9824	assert_eq!(r, e);
9825	}
9826
9827	#[simd_test(enable = "avx512dq")]
9828	unsafe fn test_kortest_mask8_u8() {
9829	let a: __mmask8 = `0b01101001`;
9830	let b: __mmask8 = `0b10110110`;
9831	let mut all_ones: u8 = `0`;
9832	let r = _kortest_mask8_u8(a, b, &mut all_ones);
9833	assert_eq!(r, `0`);
9834	assert_eq!(all_ones, `1`);
9835	}
9836
9837	#[simd_test(enable = "avx512dq")]
9838	unsafe fn test_kortestc_mask8_u8() {
9839	let a: __mmask8 = `0b01101001`;
9840	let b: __mmask8 = `0b10110110`;
9841	let r = _kortestc_mask8_u8(a, b);
9842	assert_eq!(r, `1`);
9843	}
9844
9845	#[simd_test(enable = "avx512dq")]
9846	unsafe fn test_kortestz_mask8_u8() {
9847	let a: __mmask8 = `0b01101001`;
9848	let b: __mmask8 = `0b10110110`;
9849	let r = _kortestz_mask8_u8(a, b);
9850	assert_eq!(r, `0`);
9851	}
9852
9853	#[simd_test(enable = "avx512dq")]
9854	unsafe fn test_kshiftli_mask8() {
9855	let a: __mmask8 = `0b01101001`;
9856	let r = _kshiftli_mask8::<`3`>(a);
9857	let e: __mmask8 = `0b01001000`;
9858	assert_eq!(r, e);
9859	}
9860
9861	#[simd_test(enable = "avx512dq")]
9862	unsafe fn test_kshiftri_mask8() {
9863	let a: __mmask8 = `0b01101001`;
9864	let r = _kshiftri_mask8::<`3`>(a);
9865	let e: __mmask8 = `0b00001101`;
9866	assert_eq!(r, e);
9867	}
9868
9869	#[simd_test(enable = "avx512dq")]
9870	unsafe fn test_ktest_mask8_u8() {
9871	let a: __mmask8 = `0b01101001`;
9872	let b: __mmask8 = `0b10010110`;
9873	let mut and_not: u8 = `0`;
9874	let r = _ktest_mask8_u8(a, b, &mut and_not);
9875	assert_eq!(r, `1`);
9876	assert_eq!(and_not, `0`);
9877	}
9878
9879	#[simd_test(enable = "avx512dq")]
9880	unsafe fn test_ktestc_mask8_u8() {
9881	let a: __mmask8 = `0b01101001`;
9882	let b: __mmask8 = `0b10010110`;
9883	let r = _ktestc_mask8_u8(a, b);
9884	assert_eq!(r, `0`);
9885	}
9886
9887	#[simd_test(enable = "avx512dq")]
9888	unsafe fn test_ktestz_mask8_u8() {
9889	let a: __mmask8 = `0b01101001`;
9890	let b: __mmask8 = `0b10010110`;
9891	let r = _ktestz_mask8_u8(a, b);
9892	assert_eq!(r, `1`);
9893	}
9894
9895	#[simd_test(enable = "avx512dq")]
9896	unsafe fn test_ktest_mask16_u8() {
9897	let a: __mmask16 = `0b0110100100111100`;
9898	let b: __mmask16 = `0b1001011011000011`;
9899	let mut and_not: u8 = `0`;
9900	let r = _ktest_mask16_u8(a, b, &mut and_not);
9901	assert_eq!(r, `1`);
9902	assert_eq!(and_not, `0`);
9903	}
9904
9905	#[simd_test(enable = "avx512dq")]
9906	unsafe fn test_ktestc_mask16_u8() {
9907	let a: __mmask16 = `0b0110100100111100`;
9908	let b: __mmask16 = `0b1001011011000011`;
9909	let r = _ktestc_mask16_u8(a, b);
9910	assert_eq!(r, `0`);
9911	}
9912
9913	#[simd_test(enable = "avx512dq")]
9914	unsafe fn test_ktestz_mask16_u8() {
9915	let a: __mmask16 = `0b0110100100111100`;
9916	let b: __mmask16 = `0b1001011011000011`;
9917	let r = _ktestz_mask16_u8(a, b);
9918	assert_eq!(r, `1`);
9919	}
9920
9921	#[simd_test(enable = "avx512dq")]
9922	unsafe fn test_load_mask8() {
9923	let a: __mmask8 = `0b01101001`;
9924	let r = _load_mask8(&a);
9925	let e: __mmask8 = `0b01101001`;
9926	assert_eq!(r, e);
9927	}
9928
9929	#[simd_test(enable = "avx512dq")]
9930	unsafe fn test_store_mask8() {
9931	let a: __mmask8 = `0b01101001`;
9932	let mut r = `0`;
9933	_store_mask8(&mut r, a);
9934	let e: __mmask8 = `0b01101001`;
9935	assert_eq!(r, e);
9936	}
9937
9938	#[simd_test(enable = "avx512dq,avx512vl")]
9939	unsafe fn test_mm_movepi32_mask() {
9940	let a = _mm_set_epi32(`0`, `-2`, `-3`, `4`);
9941	let r = _mm_movepi32_mask(a);
9942	let e = `0b0110`;
9943	assert_eq!(r, e);
9944	}
9945
9946	#[simd_test(enable = "avx512dq,avx512vl")]
9947	unsafe fn test_mm256_movepi32_mask() {
9948	let a = _mm256_set_epi32(`0`, `-2`, `-3`, `4`, `-5`, `6`, `7`, `-8`);
9949	let r = _mm256_movepi32_mask(a);
9950	let e = `0b01101001`;
9951	assert_eq!(r, e);
9952	}
9953
9954	#[simd_test(enable = "avx512dq")]
9955	unsafe fn test_mm512_movepi32_mask() {
9956	let a = _mm512_set_epi32(
9957	`0`, `-2`, `-3`, `4`, `-5`, `6`, `7`, `-8`, `9`, `10`, `-11`, `-12`, `-13`, `-14`, `15`, `16`,
9958	);
9959	let r = _mm512_movepi32_mask(a);
9960	let e = `0b0110100100111100`;
9961	assert_eq!(r, e);
9962	}
9963
9964	#[simd_test(enable = "avx512dq,avx512vl")]
9965	unsafe fn test_mm_movepi64_mask() {
9966	let a = _mm_set_epi64x(`0`, `-2`);
9967	let r = _mm_movepi64_mask(a);
9968	let e = `0b01`;
9969	assert_eq!(r, e);
9970	}
9971
9972	#[simd_test(enable = "avx512dq,avx512vl")]
9973	unsafe fn test_mm256_movepi64_mask() {
9974	let a = _mm256_set_epi64x(`0`, `-2`, `-3`, `4`);
9975	let r = _mm256_movepi64_mask(a);
9976	let e = `0b0110`;
9977	assert_eq!(r, e);
9978	}
9979
9980	#[simd_test(enable = "avx512dq")]
9981	unsafe fn test_mm512_movepi64_mask() {
9982	let a = _mm512_set_epi64(`0`, `-2`, `-3`, `4`, `-5`, `6`, `7`, `-8`);
9983	let r = _mm512_movepi64_mask(a);
9984	let e = `0b01101001`;
9985	assert_eq!(r, e);
9986	}
9987
9988	#[simd_test(enable = "avx512dq,avx512vl")]
9989	unsafe fn test_mm_movm_epi32() {
9990	let a = `0b0110`;
9991	let r = _mm_movm_epi32(a);
9992	let e = _mm_set_epi32(`0`, `-1`, `-1`, `0`);
9993	assert_eq_m128i(r, e);
9994	}
9995
9996	#[simd_test(enable = "avx512dq,avx512vl")]
9997	unsafe fn test_mm256_movm_epi32() {
9998	let a = `0b01101001`;
9999	let r = _mm256_movm_epi32(a);
10000	let e = _mm256_set_epi32(`0`, `-1`, `-1`, `0`, `-1`, `0`, `0`, `-1`);
10001	assert_eq_m256i(r, e);
10002	}
10003
10004	#[simd_test(enable = "avx512dq")]
10005	unsafe fn test_mm512_movm_epi32() {
10006	let a = `0b0110100100111100`;
10007	let r = _mm512_movm_epi32(a);
10008	let e = _mm512_set_epi32(`0`, `-1`, `-1`, `0`, `-1`, `0`, `0`, `-1`, `0`, `0`, `-1`, `-1`, `-1`, `-1`, `0`, `0`);
10009	assert_eq_m512i(r, e);
10010	}
10011
10012	#[simd_test(enable = "avx512dq,avx512vl")]
10013	unsafe fn test_mm_movm_epi64() {
10014	let a = `0b01`;
10015	let r = _mm_movm_epi64(a);
10016	let e = _mm_set_epi64x(`0`, `-1`);
10017	assert_eq_m128i(r, e);
10018	}
10019
10020	#[simd_test(enable = "avx512dq,avx512vl")]
10021	unsafe fn test_mm256_movm_epi64() {
10022	let a = `0b0110`;
10023	let r = _mm256_movm_epi64(a);
10024	let e = _mm256_set_epi64x(`0`, `-1`, `-1`, `0`);
10025	assert_eq_m256i(r, e);
10026	}
10027
10028	#[simd_test(enable = "avx512dq")]
10029	unsafe fn test_mm512_movm_epi64() {
10030	let a = `0b01101001`;
10031	let r = _mm512_movm_epi64(a);
10032	let e = _mm512_set_epi64(`0`, `-1`, `-1`, `0`, `-1`, `0`, `0`, `-1`);
10033	assert_eq_m512i(r, e);
10034	}
10035
10036	#[simd_test(enable = "avx512dq")]
10037	unsafe fn test_mm512_range_round_pd() {
10038	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
10039	let b = _mm512_set_pd(`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`);
10040	let r = _mm512_range_round_pd::<`0b0101`, _MM_FROUND_NO_EXC>(a, b);
10041	let e = _mm512_set_pd(`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`);
10042	assert_eq_m512d(r, e);
10043	}
10044
10045	#[simd_test(enable = "avx512dq")]
10046	unsafe fn test_mm512_mask_range_round_pd() {
10047	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
10048	let b = _mm512_set_pd(`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`);
10049	let c = _mm512_set_pd(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
10050	let r = _mm512_mask_range_round_pd::<`0b0101`, _MM_FROUND_NO_EXC>(c, `0b01101001`, a, b);
10051	let e = _mm512_set_pd(`9.`, `2.`, `4.`, `12.`, `6.`, `14.`, `15.`, `8.`);
10052	assert_eq_m512d(r, e);
10053	}
10054
10055	#[simd_test(enable = "avx512dq")]
10056	unsafe fn test_mm512_maskz_range_round_pd() {
10057	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
10058	let b = _mm512_set_pd(`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`);
10059	let r = _mm512_maskz_range_round_pd::<`0b0101`, _MM_FROUND_NO_EXC>(`0b01101001`, a, b);
10060	let e = _mm512_set_pd(`0.`, `2.`, `4.`, `0.`, `6.`, `0.`, `0.`, `8.`);
10061	assert_eq_m512d(r, e);
10062	}
10063
10064	#[simd_test(enable = "avx512dq,avx512vl")]
10065	unsafe fn test_mm_range_pd() {
10066	let a = _mm_set_pd(`1.`, `2.`);
10067	let b = _mm_set_pd(`2.`, `1.`);
10068	let r = _mm_range_pd::<`0b0101`>(a, b);
10069	let e = _mm_set_pd(`2.`, `2.`);
10070	assert_eq_m128d(r, e);
10071	}
10072
10073	#[simd_test(enable = "avx512dq,avx512vl")]
10074	unsafe fn test_mm_mask_range_pd() {
10075	let a = _mm_set_pd(`1.`, `2.`);
10076	let b = _mm_set_pd(`2.`, `1.`);
10077	let c = _mm_set_pd(`3.`, `4.`);
10078	let r = _mm_mask_range_pd::<`0b0101`>(c, `0b01`, a, b);
10079	let e = _mm_set_pd(`3.`, `2.`);
10080	assert_eq_m128d(r, e);
10081	}
10082
10083	#[simd_test(enable = "avx512dq,avx512vl")]
10084	unsafe fn test_mm_maskz_range_pd() {
10085	let a = _mm_set_pd(`1.`, `2.`);
10086	let b = _mm_set_pd(`2.`, `1.`);
10087	let r = _mm_maskz_range_pd::<`0b0101`>(`0b01`, a, b);
10088	let e = _mm_set_pd(`0.`, `2.`);
10089	assert_eq_m128d(r, e);
10090	}
10091
10092	#[simd_test(enable = "avx512dq,avx512vl")]
10093	unsafe fn test_mm256_range_pd() {
10094	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
10095	let b = _mm256_set_pd(`2.`, `1.`, `4.`, `3.`);
10096	let r = _mm256_range_pd::<`0b0101`>(a, b);
10097	let e = _mm256_set_pd(`2.`, `2.`, `4.`, `4.`);
10098	assert_eq_m256d(r, e);
10099	}
10100
10101	#[simd_test(enable = "avx512dq,avx512vl")]
10102	unsafe fn test_mm256_mask_range_pd() {
10103	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
10104	let b = _mm256_set_pd(`2.`, `1.`, `4.`, `3.`);
10105	let c = _mm256_set_pd(`5.`, `6.`, `7.`, `8.`);
10106	let r = _mm256_mask_range_pd::<`0b0101`>(c, `0b0110`, a, b);
10107	let e = _mm256_set_pd(`5.`, `2.`, `4.`, `8.`);
10108	assert_eq_m256d(r, e);
10109	}
10110
10111	#[simd_test(enable = "avx512dq,avx512vl")]
10112	unsafe fn test_mm256_maskz_range_pd() {
10113	let a = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
10114	let b = _mm256_set_pd(`2.`, `1.`, `4.`, `3.`);
10115	let r = _mm256_maskz_range_pd::<`0b0101`>(`0b0110`, a, b);
10116	let e = _mm256_set_pd(`0.`, `2.`, `4.`, `0.`);
10117	assert_eq_m256d(r, e);
10118	}
10119
10120	#[simd_test(enable = "avx512dq")]
10121	unsafe fn test_mm512_range_pd() {
10122	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
10123	let b = _mm512_set_pd(`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`);
10124	let r = _mm512_range_pd::<`0b0101`>(a, b);
10125	let e = _mm512_set_pd(`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`);
10126	assert_eq_m512d(r, e);
10127	}
10128
10129	#[simd_test(enable = "avx512dq")]
10130	unsafe fn test_mm512_mask_range_pd() {
10131	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
10132	let b = _mm512_set_pd(`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`);
10133	let c = _mm512_set_pd(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
10134	let r = _mm512_mask_range_pd::<`0b0101`>(c, `0b01101001`, a, b);
10135	let e = _mm512_set_pd(`9.`, `2.`, `4.`, `12.`, `6.`, `14.`, `15.`, `8.`);
10136	assert_eq_m512d(r, e);
10137	}
10138
10139	#[simd_test(enable = "avx512dq")]
10140	unsafe fn test_mm512_maskz_range_pd() {
10141	let a = _mm512_set_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
10142	let b = _mm512_set_pd(`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`);
10143	let r = _mm512_maskz_range_pd::<`0b0101`>(`0b01101001`, a, b);
10144	let e = _mm512_set_pd(`0.`, `2.`, `4.`, `0.`, `6.`, `0.`, `0.`, `8.`);
10145	assert_eq_m512d(r, e);
10146	}
10147
10148	#[simd_test(enable = "avx512dq")]
10149	unsafe fn test_mm512_range_round_ps() {
10150	let a = _mm512_set_ps(
10151	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
10152	);
10153	let b = _mm512_set_ps(
10154	`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`, `10.`, `9.`, `12.`, `11.`, `14.`, `13.`, `16.`, `15.`,
10155	);
10156	let r = _mm512_range_round_ps::<`0b0101`, _MM_FROUND_NO_EXC>(a, b);
10157	let e = _mm512_set_ps(
10158	`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`, `10.`, `10.`, `12.`, `12.`, `14.`, `14.`, `16.`, `16.`,
10159	);
10160	assert_eq_m512(r, e);
10161	}
10162
10163	#[simd_test(enable = "avx512dq")]
10164	unsafe fn test_mm512_mask_range_round_ps() {
10165	let a = _mm512_set_ps(
10166	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
10167	);
10168	let b = _mm512_set_ps(
10169	`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`, `10.`, `9.`, `12.`, `11.`, `14.`, `13.`, `16.`, `15.`,
10170	);
10171	let c = _mm512_set_ps(
10172	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
10173	);
10174	let r =
10175	_mm512_mask_range_round_ps::<`0b0101`, _MM_FROUND_NO_EXC>(c, `0b0110100100111100`, a, b);
10176	let e = _mm512_set_ps(
10177	`17.`, `2.`, `4.`, `20.`, `6.`, `22.`, `23.`, `8.`, `25.`, `26.`, `12.`, `12.`, `14.`, `14.`, `31.`, `32.`,
10178	);
10179	assert_eq_m512(r, e);
10180	}
10181
10182	#[simd_test(enable = "avx512dq")]
10183	unsafe fn test_mm512_maskz_range_round_ps() {
10184	let a = _mm512_set_ps(
10185	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
10186	);
10187	let b = _mm512_set_ps(
10188	`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`, `10.`, `9.`, `12.`, `11.`, `14.`, `13.`, `16.`, `15.`,
10189	);
10190	let r = _mm512_maskz_range_round_ps::<`0b0101`, _MM_FROUND_NO_EXC>(`0b0110100100111100`, a, b);
10191	let e = _mm512_set_ps(
10192	`0.`, `2.`, `4.`, `0.`, `6.`, `0.`, `0.`, `8.`, `0.`, `0.`, `12.`, `12.`, `14.`, `14.`, `0.`, `0.`,
10193	);
10194	assert_eq_m512(r, e);
10195	}
10196
10197	#[simd_test(enable = "avx512dq,avx512vl")]
10198	unsafe fn test_mm_range_ps() {
10199	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
10200	let b = _mm_set_ps(`2.`, `1.`, `4.`, `3.`);
10201	let r = _mm_range_ps::<`0b0101`>(a, b);
10202	let e = _mm_set_ps(`2.`, `2.`, `4.`, `4.`);
10203	assert_eq_m128(r, e);
10204	}
10205
10206	#[simd_test(enable = "avx512dq,avx512vl")]
10207	unsafe fn test_mm_mask_range_ps() {
10208	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
10209	let b = _mm_set_ps(`2.`, `1.`, `4.`, `3.`);
10210	let c = _mm_set_ps(`5.`, `6.`, `7.`, `8.`);
10211	let r = _mm_mask_range_ps::<`0b0101`>(c, `0b0110`, a, b);
10212	let e = _mm_set_ps(`5.`, `2.`, `4.`, `8.`);
10213	assert_eq_m128(r, e);
10214	}
10215
10216	#[simd_test(enable = "avx512dq,avx512vl")]
10217	unsafe fn test_mm_maskz_range_ps() {
10218	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
10219	let b = _mm_set_ps(`2.`, `1.`, `4.`, `3.`);
10220	let r = _mm_maskz_range_ps::<`0b0101`>(`0b0110`, a, b);
10221	let e = _mm_set_ps(`0.`, `2.`, `4.`, `0.`);
10222	assert_eq_m128(r, e);
10223	}
10224
10225	#[simd_test(enable = "avx512dq,avx512vl")]
10226	unsafe fn test_mm256_range_ps() {
10227	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
10228	let b = _mm256_set_ps(`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`);
10229	let r = _mm256_range_ps::<`0b0101`>(a, b);
10230	let e = _mm256_set_ps(`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`);
10231	assert_eq_m256(r, e);
10232	}
10233
10234	#[simd_test(enable = "avx512dq,avx512vl")]
10235	unsafe fn test_mm256_mask_range_ps() {
10236	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
10237	let b = _mm256_set_ps(`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`);
10238	let c = _mm256_set_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
10239	let r = _mm256_mask_range_ps::<`0b0101`>(c, `0b01101001`, a, b);
10240	let e = _mm256_set_ps(`9.`, `2.`, `4.`, `12.`, `6.`, `14.`, `15.`, `8.`);
10241	assert_eq_m256(r, e);
10242	}
10243
10244	#[simd_test(enable = "avx512dq,avx512vl")]
10245	unsafe fn test_mm256_maskz_range_ps() {
10246	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
10247	let b = _mm256_set_ps(`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`);
10248	let r = _mm256_maskz_range_ps::<`0b0101`>(`0b01101001`, a, b);
10249	let e = _mm256_set_ps(`0.`, `2.`, `4.`, `0.`, `6.`, `0.`, `0.`, `8.`);
10250	assert_eq_m256(r, e);
10251	}
10252
10253	#[simd_test(enable = "avx512dq")]
10254	unsafe fn test_mm512_range_ps() {
10255	let a = _mm512_set_ps(
10256	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
10257	);
10258	let b = _mm512_set_ps(
10259	`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`, `10.`, `9.`, `12.`, `11.`, `14.`, `13.`, `16.`, `15.`,
10260	);
10261	let r = _mm512_range_ps::<`0b0101`>(a, b);
10262	let e = _mm512_set_ps(
10263	`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`, `10.`, `10.`, `12.`, `12.`, `14.`, `14.`, `16.`, `16.`,
10264	);
10265	assert_eq_m512(r, e);
10266	}
10267
10268	#[simd_test(enable = "avx512dq")]
10269	unsafe fn test_mm512_mask_range_ps() {
10270	let a = _mm512_set_ps(
10271	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
10272	);
10273	let b = _mm512_set_ps(
10274	`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`, `10.`, `9.`, `12.`, `11.`, `14.`, `13.`, `16.`, `15.`,
10275	);
10276	let c = _mm512_set_ps(
10277	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
10278	);
10279	let r = _mm512_mask_range_ps::<`0b0101`>(c, `0b0110100100111100`, a, b);
10280	let e = _mm512_set_ps(
10281	`17.`, `2.`, `4.`, `20.`, `6.`, `22.`, `23.`, `8.`, `25.`, `26.`, `12.`, `12.`, `14.`, `14.`, `31.`, `32.`,
10282	);
10283	assert_eq_m512(r, e);
10284	}
10285
10286	#[simd_test(enable = "avx512dq")]
10287	unsafe fn test_mm512_maskz_range_ps() {
10288	let a = _mm512_set_ps(
10289	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
10290	);
10291	let b = _mm512_set_ps(
10292	`2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`, `10.`, `9.`, `12.`, `11.`, `14.`, `13.`, `16.`, `15.`,
10293	);
10294	let r = _mm512_maskz_range_ps::<`0b0101`>(`0b0110100100111100`, a, b);
10295	let e = _mm512_set_ps(
10296	`0.`, `2.`, `4.`, `0.`, `6.`, `0.`, `0.`, `8.`, `0.`, `0.`, `12.`, `12.`, `14.`, `14.`, `0.`, `0.`,
10297	);
10298	assert_eq_m512(r, e);
10299	}
10300
10301	#[simd_test(enable = "avx512dq")]
10302	unsafe fn test_mm_range_round_sd() {
10303	let a = _mm_set_sd(`1.`);
10304	let b = _mm_set_sd(`2.`);
10305	let r = _mm_range_round_sd::<`0b0101`, _MM_FROUND_NO_EXC>(a, b);
10306	let e = _mm_set_sd(`2.`);
10307	assert_eq_m128d(r, e);
10308	}
10309
10310	#[simd_test(enable = "avx512dq")]
10311	unsafe fn test_mm_mask_range_round_sd() {
10312	let a = _mm_set_sd(`1.`);
10313	let b = _mm_set_sd(`2.`);
10314	let c = _mm_set_sd(`3.`);
10315	let r = _mm_mask_range_round_sd::<`0b0101`, _MM_FROUND_NO_EXC>(c, `0b0`, a, b);
10316	let e = _mm_set_sd(`3.`);
10317	assert_eq_m128d(r, e);
10318	}
10319
10320	#[simd_test(enable = "avx512dq")]
10321	unsafe fn test_mm_maskz_range_round_sd() {
10322	let a = _mm_set_sd(`1.`);
10323	let b = _mm_set_sd(`2.`);
10324	let r = _mm_maskz_range_round_sd::<`0b0101`, _MM_FROUND_NO_EXC>(`0b0`, a, b);
10325	let e = _mm_set_sd(`0.`);
10326	assert_eq_m128d(r, e);
10327	}
10328
10329	#[simd_test(enable = "avx512dq")]
10330	unsafe fn test_mm_mask_range_sd() {
10331	let a = _mm_set_sd(`1.`);
10332	let b = _mm_set_sd(`2.`);
10333	let c = _mm_set_sd(`3.`);
10334	let r = _mm_mask_range_sd::<`0b0101`>(c, `0b0`, a, b);
10335	let e = _mm_set_sd(`3.`);
10336	assert_eq_m128d(r, e);
10337	}
10338
10339	#[simd_test(enable = "avx512dq")]
10340	unsafe fn test_mm_maskz_range_sd() {
10341	let a = _mm_set_sd(`1.`);
10342	let b = _mm_set_sd(`2.`);
10343	let r = _mm_maskz_range_sd::<`0b0101`>(`0b0`, a, b);
10344	let e = _mm_set_sd(`0.`);
10345	assert_eq_m128d(r, e);
10346	}
10347
10348	#[simd_test(enable = "avx512dq")]
10349	unsafe fn test_mm_range_round_ss() {
10350	let a = _mm_set_ss(`1.`);
10351	let b = _mm_set_ss(`2.`);
10352	let r = _mm_range_round_ss::<`0b0101`, _MM_FROUND_NO_EXC>(a, b);
10353	let e = _mm_set_ss(`2.`);
10354	assert_eq_m128(r, e);
10355	}
10356
10357	#[simd_test(enable = "avx512dq")]
10358	unsafe fn test_mm_mask_range_round_ss() {
10359	let a = _mm_set_ss(`1.`);
10360	let b = _mm_set_ss(`2.`);
10361	let c = _mm_set_ss(`3.`);
10362	let r = _mm_mask_range_round_ss::<`0b0101`, _MM_FROUND_NO_EXC>(c, `0b0`, a, b);
10363	let e = _mm_set_ss(`3.`);
10364	assert_eq_m128(r, e);
10365	}
10366
10367	#[simd_test(enable = "avx512dq")]
10368	unsafe fn test_mm_maskz_range_round_ss() {
10369	let a = _mm_set_ss(`1.`);
10370	let b = _mm_set_ss(`2.`);
10371	let r = _mm_maskz_range_round_ss::<`0b0101`, _MM_FROUND_NO_EXC>(`0b0`, a, b);
10372	let e = _mm_set_ss(`0.`);
10373	assert_eq_m128(r, e);
10374	}
10375
10376	#[simd_test(enable = "avx512dq")]
10377	unsafe fn test_mm_mask_range_ss() {
10378	let a = _mm_set_ss(`1.`);
10379	let b = _mm_set_ss(`2.`);
10380	let c = _mm_set_ss(`3.`);
10381	let r = _mm_mask_range_ss::<`0b0101`>(c, `0b0`, a, b);
10382	let e = _mm_set_ss(`3.`);
10383	assert_eq_m128(r, e);
10384	}
10385
10386	#[simd_test(enable = "avx512dq")]
10387	unsafe fn test_mm_maskz_range_ss() {
10388	let a = _mm_set_ss(`1.`);
10389	let b = _mm_set_ss(`2.`);
10390	let r = _mm_maskz_range_ss::<`0b0101`>(`0b0`, a, b);
10391	let e = _mm_set_ss(`0.`);
10392	assert_eq_m128(r, e);
10393	}
10394
10395	#[simd_test(enable = "avx512dq")]
10396	unsafe fn test_mm512_reduce_round_pd() {
10397	let a = _mm512_set_pd(`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`);
10398	let r = _mm512_reduce_round_pd::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
10399	let e = _mm512_set_pd(`0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`);
10400	assert_eq_m512d(r, e);
10401	}
10402
10403	#[simd_test(enable = "avx512dq")]
10404	unsafe fn test_mm512_mask_reduce_round_pd() {
10405	let a = _mm512_set_pd(`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`);
10406	let src = _mm512_set_pd(`3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`);
10407	let r = _mm512_mask_reduce_round_pd::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10408	src, `0b01101001`, a,
10409	);
10410	let e = _mm512_set_pd(`3.`, `0.`, `0.25`, `6.`, `0.25`, `8.`, `9.`, `0.`);
10411	assert_eq_m512d(r, e);
10412	}
10413
10414	#[simd_test(enable = "avx512dq")]
10415	unsafe fn test_mm512_maskz_reduce_round_pd() {
10416	let a = _mm512_set_pd(`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`);
10417	let r = _mm512_maskz_reduce_round_pd::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10418	`0b01101001`, a,
10419	);
10420	let e = _mm512_set_pd(`0.`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.`, `0.`);
10421	assert_eq_m512d(r, e);
10422	}
10423
10424	#[simd_test(enable = "avx512dq,avx512vl")]
10425	unsafe fn test_mm_reduce_pd() {
10426	let a = _mm_set_pd(`0.25`, `0.50`);
10427	let r = _mm_reduce_pd::<{ `16` \| _MM_FROUND_TO_ZERO }>(a);
10428	let e = _mm_set_pd(`0.25`, `0.`);
10429	assert_eq_m128d(r, e);
10430	}
10431
10432	#[simd_test(enable = "avx512dq,avx512vl")]
10433	unsafe fn test_mm_mask_reduce_pd() {
10434	let a = _mm_set_pd(`0.25`, `0.50`);
10435	let src = _mm_set_pd(`3.`, `4.`);
10436	let r = _mm_mask_reduce_pd::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `0b01`, a);
10437	let e = _mm_set_pd(`3.`, `0.`);
10438	assert_eq_m128d(r, e);
10439	}
10440
10441	#[simd_test(enable = "avx512dq,avx512vl")]
10442	unsafe fn test_mm_maskz_reduce_pd() {
10443	let a = _mm_set_pd(`0.25`, `0.50`);
10444	let r = _mm_maskz_reduce_pd::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0b01`, a);
10445	let e = _mm_set_pd(`0.`, `0.`);
10446	assert_eq_m128d(r, e);
10447	}
10448
10449	#[simd_test(enable = "avx512dq,avx512vl")]
10450	unsafe fn test_mm256_reduce_pd() {
10451	let a = _mm256_set_pd(`0.25`, `0.50`, `0.75`, `1.0`);
10452	let r = _mm256_reduce_pd::<{ `16` \| _MM_FROUND_TO_ZERO }>(a);
10453	let e = _mm256_set_pd(`0.25`, `0.`, `0.25`, `0.`);
10454	assert_eq_m256d(r, e);
10455	}
10456
10457	#[simd_test(enable = "avx512dq,avx512vl")]
10458	unsafe fn test_mm256_mask_reduce_pd() {
10459	let a = _mm256_set_pd(`0.25`, `0.50`, `0.75`, `1.0`);
10460	let src = _mm256_set_pd(`3.`, `4.`, `5.`, `6.`);
10461	let r = _mm256_mask_reduce_pd::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `0b0110`, a);
10462	let e = _mm256_set_pd(`3.`, `0.`, `0.25`, `6.`);
10463	assert_eq_m256d(r, e);
10464	}
10465
10466	#[simd_test(enable = "avx512dq,avx512vl")]
10467	unsafe fn test_mm256_maskz_reduce_pd() {
10468	let a = _mm256_set_pd(`0.25`, `0.50`, `0.75`, `1.0`);
10469	let r = _mm256_maskz_reduce_pd::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0b0110`, a);
10470	let e = _mm256_set_pd(`0.`, `0.`, `0.25`, `0.`);
10471	assert_eq_m256d(r, e);
10472	}
10473
10474	#[simd_test(enable = "avx512dq")]
10475	unsafe fn test_mm512_reduce_pd() {
10476	let a = _mm512_set_pd(`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`);
10477	let r = _mm512_reduce_pd::<{ `16` \| _MM_FROUND_TO_ZERO }>(a);
10478	let e = _mm512_set_pd(`0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`);
10479	assert_eq_m512d(r, e);
10480	}
10481
10482	#[simd_test(enable = "avx512dq")]
10483	unsafe fn test_mm512_mask_reduce_pd() {
10484	let a = _mm512_set_pd(`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`);
10485	let src = _mm512_set_pd(`3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`);
10486	let r = _mm512_mask_reduce_pd::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `0b01101001`, a);
10487	let e = _mm512_set_pd(`3.`, `0.`, `0.25`, `6.`, `0.25`, `8.`, `9.`, `0.`);
10488	assert_eq_m512d(r, e);
10489	}
10490
10491	#[simd_test(enable = "avx512dq")]
10492	unsafe fn test_mm512_maskz_reduce_pd() {
10493	let a = _mm512_set_pd(`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`);
10494	let r = _mm512_maskz_reduce_pd::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0b01101001`, a);
10495	let e = _mm512_set_pd(`0.`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.`, `0.`);
10496	assert_eq_m512d(r, e);
10497	}
10498
10499	#[simd_test(enable = "avx512dq")]
10500	unsafe fn test_mm512_reduce_round_ps() {
10501	let a = _mm512_set_ps(
10502	`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`, `2.25`, `2.50`, `2.75`, `3.0`, `3.25`, `3.50`, `3.75`,
10503	`4.0`,
10504	);
10505	let r = _mm512_reduce_round_ps::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
10506	let e = _mm512_set_ps(
10507	`0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`,
10508	);
10509	assert_eq_m512(r, e);
10510	}
10511
10512	#[simd_test(enable = "avx512dq")]
10513	unsafe fn test_mm512_mask_reduce_round_ps() {
10514	let a = _mm512_set_ps(
10515	`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`, `2.25`, `2.50`, `2.75`, `3.0`, `3.25`, `3.50`, `3.75`,
10516	`4.0`,
10517	);
10518	let src = _mm512_set_ps(
10519	`5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`,
10520	);
10521	let r = _mm512_mask_reduce_round_ps::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10522	src,
10523	`0b0110100100111100`,
10524	a,
10525	);
10526	let e = _mm512_set_ps(
10527	`5.`, `0.`, `0.25`, `8.`, `0.25`, `10.`, `11.`, `0.`, `13.`, `14.`, `0.25`, `0.`, `0.25`, `0.`, `19.`, `20.`,
10528	);
10529	assert_eq_m512(r, e);
10530	}
10531
10532	#[simd_test(enable = "avx512dq")]
10533	unsafe fn test_mm512_maskz_reduce_round_ps() {
10534	let a = _mm512_set_ps(
10535	`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`, `2.25`, `2.50`, `2.75`, `3.0`, `3.25`, `3.50`, `3.75`,
10536	`4.0`,
10537	);
10538	let r = _mm512_maskz_reduce_round_ps::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10539	`0b0110100100111100`,
10540	a,
10541	);
10542	let e = _mm512_set_ps(
10543	`0.`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.`, `0.`,
10544	);
10545	assert_eq_m512(r, e);
10546	}
10547
10548	#[simd_test(enable = "avx512dq,avx512vl")]
10549	unsafe fn test_mm_reduce_ps() {
10550	let a = _mm_set_ps(`0.25`, `0.50`, `0.75`, `1.0`);
10551	let r = _mm_reduce_ps::<{ `16` \| _MM_FROUND_TO_ZERO }>(a);
10552	let e = _mm_set_ps(`0.25`, `0.`, `0.25`, `0.`);
10553	assert_eq_m128(r, e);
10554	}
10555
10556	#[simd_test(enable = "avx512dq,avx512vl")]
10557	unsafe fn test_mm_mask_reduce_ps() {
10558	let a = _mm_set_ps(`0.25`, `0.50`, `0.75`, `1.0`);
10559	let src = _mm_set_ps(`2.`, `3.`, `4.`, `5.`);
10560	let r = _mm_mask_reduce_ps::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `0b0110`, a);
10561	let e = _mm_set_ps(`2.`, `0.`, `0.25`, `5.`);
10562	assert_eq_m128(r, e);
10563	}
10564
10565	#[simd_test(enable = "avx512dq,avx512vl")]
10566	unsafe fn test_mm_maskz_reduce_ps() {
10567	let a = _mm_set_ps(`0.25`, `0.50`, `0.75`, `1.0`);
10568	let r = _mm_maskz_reduce_ps::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0b0110`, a);
10569	let e = _mm_set_ps(`0.`, `0.`, `0.25`, `0.`);
10570	assert_eq_m128(r, e);
10571	}
10572
10573	#[simd_test(enable = "avx512dq,avx512vl")]
10574	unsafe fn test_mm256_reduce_ps() {
10575	let a = _mm256_set_ps(`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`);
10576	let r = _mm256_reduce_ps::<{ `16` \| _MM_FROUND_TO_ZERO }>(a);
10577	let e = _mm256_set_ps(`0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`);
10578	assert_eq_m256(r, e);
10579	}
10580
10581	#[simd_test(enable = "avx512dq,avx512vl")]
10582	unsafe fn test_mm256_mask_reduce_ps() {
10583	let a = _mm256_set_ps(`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`);
10584	let src = _mm256_set_ps(`3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`);
10585	let r = _mm256_mask_reduce_ps::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `0b01101001`, a);
10586	let e = _mm256_set_ps(`3.`, `0.`, `0.25`, `6.`, `0.25`, `8.`, `9.`, `0.`);
10587	assert_eq_m256(r, e);
10588	}
10589
10590	#[simd_test(enable = "avx512dq,avx512vl")]
10591	unsafe fn test_mm256_maskz_reduce_ps() {
10592	let a = _mm256_set_ps(`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`);
10593	let r = _mm256_maskz_reduce_ps::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0b01101001`, a);
10594	let e = _mm256_set_ps(`0.`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.`, `0.`);
10595	assert_eq_m256(r, e);
10596	}
10597
10598	#[simd_test(enable = "avx512dq")]
10599	unsafe fn test_mm512_reduce_ps() {
10600	let a = _mm512_set_ps(
10601	`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`, `2.25`, `2.50`, `2.75`, `3.0`, `3.25`, `3.50`, `3.75`,
10602	`4.0`,
10603	);
10604	let r = _mm512_reduce_ps::<{ `16` \| _MM_FROUND_TO_ZERO }>(a);
10605	let e = _mm512_set_ps(
10606	`0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.25`, `0.`,
10607	);
10608	assert_eq_m512(r, e);
10609	}
10610
10611	#[simd_test(enable = "avx512dq")]
10612	unsafe fn test_mm512_mask_reduce_ps() {
10613	let a = _mm512_set_ps(
10614	`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`, `2.25`, `2.50`, `2.75`, `3.0`, `3.25`, `3.50`, `3.75`,
10615	`4.0`,
10616	);
10617	let src = _mm512_set_ps(
10618	`5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`,
10619	);
10620	let r = _mm512_mask_reduce_ps::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `0b0110100100111100`, a);
10621	let e = _mm512_set_ps(
10622	`5.`, `0.`, `0.25`, `8.`, `0.25`, `10.`, `11.`, `0.`, `13.`, `14.`, `0.25`, `0.`, `0.25`, `0.`, `19.`, `20.`,
10623	);
10624	assert_eq_m512(r, e);
10625	}
10626
10627	#[simd_test(enable = "avx512dq")]
10628	unsafe fn test_mm512_maskz_reduce_ps() {
10629	let a = _mm512_set_ps(
10630	`0.25`, `0.50`, `0.75`, `1.0`, `1.25`, `1.50`, `1.75`, `2.0`, `2.25`, `2.50`, `2.75`, `3.0`, `3.25`, `3.50`, `3.75`,
10631	`4.0`,
10632	);
10633	let r = _mm512_maskz_reduce_ps::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0b0110100100111100`, a);
10634	let e = _mm512_set_ps(
10635	`0.`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.25`, `0.`, `0.25`, `0.`, `0.`, `0.`,
10636	);
10637	assert_eq_m512(r, e);
10638	}
10639
10640	#[simd_test(enable = "avx512dq")]
10641	unsafe fn test_mm_reduce_round_sd() {
10642	let a = _mm_set_pd(`1.`, `2.`);
10643	let b = _mm_set_sd(`0.25`);
10644	let r = _mm_reduce_round_sd::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
10645	let e = _mm_set_pd(`1.`, `0.25`);
10646	assert_eq_m128d(r, e);
10647	}
10648
10649	#[simd_test(enable = "avx512dq")]
10650	unsafe fn test_mm_mask_reduce_round_sd() {
10651	let a = _mm_set_pd(`1.`, `2.`);
10652	let b = _mm_set_sd(`0.25`);
10653	let c = _mm_set_pd(`3.`, `4.`);
10654	let r = _mm_mask_reduce_round_sd::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10655	c, `0b0`, a, b,
10656	);
10657	let e = _mm_set_pd(`1.`, `4.`);
10658	assert_eq_m128d(r, e);
10659	}
10660
10661	#[simd_test(enable = "avx512dq")]
10662	unsafe fn test_mm_maskz_reduce_round_sd() {
10663	let a = _mm_set_pd(`1.`, `2.`);
10664	let b = _mm_set_sd(`0.25`);
10665	let r =
10666	_mm_maskz_reduce_round_sd::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(`0b0`, a, b);
10667	let e = _mm_set_pd(`1.`, `0.`);
10668	assert_eq_m128d(r, e);
10669	}
10670
10671	#[simd_test(enable = "avx512dq")]
10672	unsafe fn test_mm_reduce_sd() {
10673	let a = _mm_set_pd(`1.`, `2.`);
10674	let b = _mm_set_sd(`0.25`);
10675	let r = _mm_reduce_sd::<{ `16` \| _MM_FROUND_TO_ZERO }>(a, b);
10676	let e = _mm_set_pd(`1.`, `0.25`);
10677	assert_eq_m128d(r, e);
10678	}
10679
10680	#[simd_test(enable = "avx512dq")]
10681	unsafe fn test_mm_mask_reduce_sd() {
10682	let a = _mm_set_pd(`1.`, `2.`);
10683	let b = _mm_set_sd(`0.25`);
10684	let c = _mm_set_pd(`3.`, `4.`);
10685	let r = _mm_mask_reduce_sd::<{ `16` \| _MM_FROUND_TO_ZERO }>(c, `0b0`, a, b);
10686	let e = _mm_set_pd(`1.`, `4.`);
10687	assert_eq_m128d(r, e);
10688	}
10689
10690	#[simd_test(enable = "avx512dq")]
10691	unsafe fn test_mm_maskz_reduce_sd() {
10692	let a = _mm_set_pd(`1.`, `2.`);
10693	let b = _mm_set_sd(`0.25`);
10694	let r = _mm_maskz_reduce_sd::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0b0`, a, b);
10695	let e = _mm_set_pd(`1.`, `0.`);
10696	assert_eq_m128d(r, e);
10697	}
10698
10699	#[simd_test(enable = "avx512dq")]
10700	unsafe fn test_mm_reduce_round_ss() {
10701	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
10702	let b = _mm_set_ss(`0.25`);
10703	let r = _mm_reduce_round_ss::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
10704	let e = _mm_set_ps(`1.`, `2.`, `3.`, `0.25`);
10705	assert_eq_m128(r, e);
10706	}
10707
10708	#[simd_test(enable = "avx512dq")]
10709	unsafe fn test_mm_mask_reduce_round_ss() {
10710	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
10711	let b = _mm_set_ss(`0.25`);
10712	let c = _mm_set_ps(`5.`, `6.`, `7.`, `8.`);
10713	let r = _mm_mask_reduce_round_ss::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10714	c, `0b0`, a, b,
10715	);
10716	let e = _mm_set_ps(`1.`, `2.`, `3.`, `8.`);
10717	assert_eq_m128(r, e);
10718	}
10719
10720	#[simd_test(enable = "avx512dq")]
10721	unsafe fn test_mm_maskz_reduce_round_ss() {
10722	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
10723	let b = _mm_set_ss(`0.25`);
10724	let r =
10725	_mm_maskz_reduce_round_ss::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(`0b0`, a, b);
10726	let e = _mm_set_ps(`1.`, `2.`, `3.`, `0.`);
10727	assert_eq_m128(r, e);
10728	}
10729
10730	#[simd_test(enable = "avx512dq")]
10731	unsafe fn test_mm_reduce_ss() {
10732	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
10733	let b = _mm_set_ss(`0.25`);
10734	let r = _mm_reduce_ss::<{ `16` \| _MM_FROUND_TO_ZERO }>(a, b);
10735	let e = _mm_set_ps(`1.`, `2.`, `3.`, `0.25`);
10736	assert_eq_m128(r, e);
10737	}
10738
10739	#[simd_test(enable = "avx512dq")]
10740	unsafe fn test_mm_mask_reduce_ss() {
10741	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
10742	let b = _mm_set_ss(`0.25`);
10743	let c = _mm_set_ps(`5.`, `6.`, `7.`, `8.`);
10744	let r = _mm_mask_reduce_ss::<{ `16` \| _MM_FROUND_TO_ZERO }>(c, `0b0`, a, b);
10745	let e = _mm_set_ps(`1.`, `2.`, `3.`, `8.`);
10746	assert_eq_m128(r, e);
10747	}
10748
10749	#[simd_test(enable = "avx512dq")]
10750	unsafe fn test_mm_maskz_reduce_ss() {
10751	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
10752	let b = _mm_set_ss(`0.25`);
10753	let r = _mm_maskz_reduce_ss::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0b0`, a, b);
10754	let e = _mm_set_ps(`1.`, `2.`, `3.`, `0.`);
10755	assert_eq_m128(r, e);
10756	}
10757
10758	#[simd_test(enable = "avx512dq,avx512vl")]
10759	unsafe fn test_mm_fpclass_pd_mask() {
10760	let a = _mm_set_pd(`1.`, f64::INFINITY);
10761	let r = _mm_fpclass_pd_mask::<`0x18`>(a);
10762	let e = `0b01`;
10763	assert_eq!(r, e);
10764	}
10765
10766	#[simd_test(enable = "avx512dq,avx512vl")]
10767	unsafe fn test_mm_mask_fpclass_pd_mask() {
10768	let a = _mm_set_pd(`1.`, f64::INFINITY);
10769	let r = _mm_mask_fpclass_pd_mask::<`0x18`>(`0b10`, a);
10770	let e = `0b00`;
10771	assert_eq!(r, e);
10772	}
10773
10774	#[simd_test(enable = "avx512dq,avx512vl")]
10775	unsafe fn test_mm256_fpclass_pd_mask() {
10776	let a = _mm256_set_pd(`1.`, f64::INFINITY, f64::NEG_INFINITY, `0.0`);
10777	let r = _mm256_fpclass_pd_mask::<`0x18`>(a);
10778	let e = `0b0110`;
10779	assert_eq!(r, e);
10780	}
10781
10782	#[simd_test(enable = "avx512dq,avx512vl")]
10783	unsafe fn test_mm256_mask_fpclass_pd_mask() {
10784	let a = _mm256_set_pd(`1.`, f64::INFINITY, f64::NEG_INFINITY, `0.0`);
10785	let r = _mm256_mask_fpclass_pd_mask::<`0x18`>(`0b1010`, a);
10786	let e = `0b0010`;
10787	assert_eq!(r, e);
10788	}
10789
10790	#[simd_test(enable = "avx512dq")]
10791	unsafe fn test_mm512_fpclass_pd_mask() {
10792	let a = _mm512_set_pd(
10793	`1.`,
10794	f64::INFINITY,
10795	f64::NEG_INFINITY,
10796	`0.0`,
10797	`-0.0`,
10798	`-2.0`,
10799	f64::NAN,
10800	`1.0e-308`,
10801	);
10802	let r = _mm512_fpclass_pd_mask::<`0x18`>(a);
10803	let e = `0b01100000`;
10804	assert_eq!(r, e);
10805	}
10806
10807	#[simd_test(enable = "avx512dq")]
10808	unsafe fn test_mm512_mask_fpclass_pd_mask() {
10809	let a = _mm512_set_pd(
10810	`1.`,
10811	f64::INFINITY,
10812	f64::NEG_INFINITY,
10813	`0.0`,
10814	`-0.0`,
10815	`-2.0`,
10816	f64::NAN,
10817	`1.0e-308`,
10818	);
10819	let r = _mm512_mask_fpclass_pd_mask::<`0x18`>(`0b10101010`, a);
10820	let e = `0b00100000`;
10821	assert_eq!(r, e);
10822	}
10823
10824	#[simd_test(enable = "avx512dq,avx512vl")]
10825	unsafe fn test_mm_fpclass_ps_mask() {
10826	let a = _mm_set_ps(`1.`, f32::INFINITY, f32::NEG_INFINITY, `0.0`);
10827	let r = _mm_fpclass_ps_mask::<`0x18`>(a);
10828	let e = `0b0110`;
10829	assert_eq!(r, e);
10830	}
10831
10832	#[simd_test(enable = "avx512dq,avx512vl")]
10833	unsafe fn test_mm_mask_fpclass_ps_mask() {
10834	let a = _mm_set_ps(`1.`, f32::INFINITY, f32::NEG_INFINITY, `0.0`);
10835	let r = _mm_mask_fpclass_ps_mask::<`0x18`>(`0b1010`, a);
10836	let e = `0b0010`;
10837	assert_eq!(r, e);
10838	}
10839
10840	#[simd_test(enable = "avx512dq,avx512vl")]
10841	unsafe fn test_mm256_fpclass_ps_mask() {
10842	let a = _mm256_set_ps(
10843	`1.`,
10844	f32::INFINITY,
10845	f32::NEG_INFINITY,
10846	`0.0`,
10847	`-0.0`,
10848	`-2.0`,
10849	f32::NAN,
10850	`1.0e-38`,
10851	);
10852	let r = _mm256_fpclass_ps_mask::<`0x18`>(a);
10853	let e = `0b01100000`;
10854	assert_eq!(r, e);
10855	}
10856
10857	#[simd_test(enable = "avx512dq,avx512vl")]
10858	unsafe fn test_mm256_mask_fpclass_ps_mask() {
10859	let a = _mm256_set_ps(
10860	`1.`,
10861	f32::INFINITY,
10862	f32::NEG_INFINITY,
10863	`0.0`,
10864	`-0.0`,
10865	`-2.0`,
10866	f32::NAN,
10867	`1.0e-38`,
10868	);
10869	let r = _mm256_mask_fpclass_ps_mask::<`0x18`>(`0b10101010`, a);
10870	let e = `0b00100000`;
10871	assert_eq!(r, e);
10872	}
10873
10874	#[simd_test(enable = "avx512dq")]
10875	unsafe fn test_mm512_fpclass_ps_mask() {
10876	let a = _mm512_set_ps(
10877	`1.`,
10878	f32::INFINITY,
10879	f32::NEG_INFINITY,
10880	`0.0`,
10881	`-0.0`,
10882	`-2.0`,
10883	f32::NAN,
10884	`1.0e-38`,
10885	`-1.`,
10886	f32::NEG_INFINITY,
10887	f32::INFINITY,
10888	`-0.0`,
10889	`0.0`,
10890	`2.0`,
10891	f32::NAN,
10892	`-1.0e-38`,
10893	);
10894	let r = _mm512_fpclass_ps_mask::<`0x18`>(a);
10895	let e = `0b0110000001100000`;
10896	assert_eq!(r, e);
10897	}
10898
10899	#[simd_test(enable = "avx512dq")]
10900	unsafe fn test_mm512_mask_fpclass_ps_mask() {
10901	let a = _mm512_set_ps(
10902	`1.`,
10903	f32::INFINITY,
10904	f32::NEG_INFINITY,
10905	`0.0`,
10906	`-0.0`,
10907	`-2.0`,
10908	f32::NAN,
10909	`1.0e-38`,
10910	`-1.`,
10911	f32::NEG_INFINITY,
10912	f32::INFINITY,
10913	`-0.0`,
10914	`0.0`,
10915	`2.0`,
10916	f32::NAN,
10917	`-1.0e-38`,
10918	);
10919	let r = _mm512_mask_fpclass_ps_mask::<`0x18`>(`0b1010101010101010`, a);
10920	let e = `0b0010000000100000`;
10921	assert_eq!(r, e);
10922	}
10923
10924	#[simd_test(enable = "avx512dq")]
10925	unsafe fn test_mm_fpclass_sd_mask() {
10926	let a = _mm_set_pd(`1.`, f64::INFINITY);
10927	let r = _mm_fpclass_sd_mask::<`0x18`>(a);
10928	let e = `0b1`;
10929	assert_eq!(r, e);
10930	}
10931
10932	#[simd_test(enable = "avx512dq")]
10933	unsafe fn test_mm_mask_fpclass_sd_mask() {
10934	let a = _mm_set_sd(f64::INFINITY);
10935	let r = _mm_mask_fpclass_sd_mask::<`0x18`>(`0b0`, a);
10936	let e = `0b0`;
10937	assert_eq!(r, e);
10938	}
10939
10940	#[simd_test(enable = "avx512dq")]
10941	unsafe fn test_mm_fpclass_ss_mask() {
10942	let a = _mm_set_ss(f32::INFINITY);
10943	let r = _mm_fpclass_ss_mask::<`0x18`>(a);
10944	let e = `0b1`;
10945	assert_eq!(r, e);
10946	}
10947
10948	#[simd_test(enable = "avx512dq")]
10949	unsafe fn test_mm_mask_fpclass_ss_mask() {
10950	let a = _mm_set_ss(f32::INFINITY);
10951	let r = _mm_mask_fpclass_ss_mask::<`0x18`>(`0b0`, a);
10952	let e = `0b0`;
10953	assert_eq!(r, e);
10954	}
10955	}
10956