avx.rs source code [crates/core_arch/src/x86/avx.rs]

1	//! Advanced Vector Extensions (AVX)
2	//!
3	//! The references are:
4	//!
5	//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
6	//! Instruction Set Reference, A-Z][intel64_ref]. - [AMD64 Architecture
7	//! Programmer's Manual, Volume 3: General-Purpose and System
8	//! Instructions][amd64_ref].
9	//!
10	//! [Wikipedia][wiki] provides a quick overview of the instructions available.
11	//!
12	//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
13	//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
14	//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
15
16	use crate::{
17	core_arch::{simd::, simd_llvm::, x86::*},
18	intrinsics,
19	mem::{self, transmute},
20	ptr,
21	};
22
23	#[cfg(test)]
24	use stdarch_test::assert_instr;
25
26	/// Adds packed double-precision (64-bit) floating-point elements
27	/// in `a` and `b`.
28	///
29	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_pd)
30	#[inline]
31	#[target_feature(enable = "avx")]
32	#[cfg_attr(test, assert_instr(vaddpd))]
33	#[stable(feature = "simd_x86", since = "1.27.0")]
34	pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
35	simd_add(x:a, y:b)
36	}
37
38	/// Adds packed single-precision (32-bit) floating-point elements in `a` and
39	/// `b`.
40	///
41	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_ps)
42	#[inline]
43	#[target_feature(enable = "avx")]
44	#[cfg_attr(test, assert_instr(vaddps))]
45	#[stable(feature = "simd_x86", since = "1.27.0")]
46	pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
47	simd_add(x:a, y:b)
48	}
49
50	/// Computes the bitwise AND of a packed double-precision (64-bit)
51	/// floating-point elements in `a` and `b`.
52	///
53	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_pd)
54	#[inline]
55	#[target_feature(enable = "avx")]
56	// FIXME: Should be 'vandpd' instruction.
57	// See https://github.com/rust-lang/stdarch/issues/71
58	#[cfg_attr(test, assert_instr(vandps))]
59	#[stable(feature = "simd_x86", since = "1.27.0")]
60	pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
61	let a: u64x4 = transmute(src:a);
62	let b: u64x4 = transmute(src:b);
63	transmute(src:simd_and(x:a, y:b))
64	}
65
66	/// Computes the bitwise AND of packed single-precision (32-bit) floating-point
67	/// elements in `a` and `b`.
68	///
69	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_ps)
70	#[inline]
71	#[target_feature(enable = "avx")]
72	#[cfg_attr(test, assert_instr(vandps))]
73	#[stable(feature = "simd_x86", since = "1.27.0")]
74	pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
75	let a: u32x8 = transmute(src:a);
76	let b: u32x8 = transmute(src:b);
77	transmute(src:simd_and(x:a, y:b))
78	}
79
80	/// Computes the bitwise OR packed double-precision (64-bit) floating-point
81	/// elements in `a` and `b`.
82	///
83	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_pd)
84	#[inline]
85	#[target_feature(enable = "avx")]
86	// FIXME: should be `vorpd` instruction.
87	// See <https://github.com/rust-lang/stdarch/issues/71>.
88	#[cfg_attr(test, assert_instr(vorps))]
89	#[stable(feature = "simd_x86", since = "1.27.0")]
90	pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
91	let a: u64x4 = transmute(src:a);
92	let b: u64x4 = transmute(src:b);
93	transmute(src:simd_or(x:a, y:b))
94	}
95
96	/// Computes the bitwise OR packed single-precision (32-bit) floating-point
97	/// elements in `a` and `b`.
98	///
99	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_ps)
100	#[inline]
101	#[target_feature(enable = "avx")]
102	#[cfg_attr(test, assert_instr(vorps))]
103	#[stable(feature = "simd_x86", since = "1.27.0")]
104	pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
105	let a: u32x8 = transmute(src:a);
106	let b: u32x8 = transmute(src:b);
107	transmute(src:simd_or(x:a, y:b))
108	}
109
110	/// Shuffles double-precision (64-bit) floating-point elements within 128-bit
111	/// lanes using the control in `imm8`.
112	///
113	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_pd)
114	#[inline]
115	#[target_feature(enable = "avx")]
116	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
117	#[rustc_legacy_const_generics(`2`)]
118	#[stable(feature = "simd_x86", since = "1.27.0")]
119	pub unsafe fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
120	static_assert_uimm_bits!(MASK, `8`);
121	simd_shuffle!(
122	a,
123	b,
124	[
125	MASK as u32 & `0b1`,
126	((MASK as u32 >> `1`) & `0b1`) + `4`,
127	((MASK as u32 >> `2`) & `0b1`) + `2`,
128	((MASK as u32 >> `3`) & `0b1`) + `6`,
129	],
130	)
131	}
132
133	/// Shuffles single-precision (32-bit) floating-point elements in `a` within
134	/// 128-bit lanes using the control in `imm8`.
135	///
136	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_ps)
137	#[inline]
138	#[target_feature(enable = "avx")]
139	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
140	#[rustc_legacy_const_generics(`2`)]
141	#[stable(feature = "simd_x86", since = "1.27.0")]
142	pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
143	static_assert_uimm_bits!(MASK, `8`);
144	simd_shuffle!(
145	a,
146	b,
147	[
148	MASK as u32 & `0b11`,
149	(MASK as u32 >> `2`) & `0b11`,
150	((MASK as u32 >> `4`) & `0b11`) + `8`,
151	((MASK as u32 >> `6`) & `0b11`) + `8`,
152	(MASK as u32 & `0b11`) + `4`,
153	((MASK as u32 >> `2`) & `0b11`) + `4`,
154	((MASK as u32 >> `4`) & `0b11`) + `12`,
155	((MASK as u32 >> `6`) & `0b11`) + `12`,
156	],
157	)
158	}
159
160	/// Computes the bitwise NOT of packed double-precision (64-bit) floating-point
161	/// elements in `a`, and then AND with `b`.
162	///
163	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_pd)
164	#[inline]
165	#[target_feature(enable = "avx")]
166	// FIXME: should be `vandnpd` instruction.
167	#[cfg_attr(test, assert_instr(vandnps))]
168	#[stable(feature = "simd_x86", since = "1.27.0")]
169	pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
170	let a: u64x4 = transmute(src:a);
171	let b: u64x4 = transmute(src:b);
172	transmute(src:simd_and(x:simd_xor(u64x4::splat(!(`0_u64`)), a), y:b))
173	}
174
175	/// Computes the bitwise NOT of packed single-precision (32-bit) floating-point
176	/// elements in `a`
177	/// and then AND with `b`.
178	///
179	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_ps)
180	#[inline]
181	#[target_feature(enable = "avx")]
182	#[cfg_attr(test, assert_instr(vandnps))]
183	#[stable(feature = "simd_x86", since = "1.27.0")]
184	pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
185	let a: u32x8 = transmute(src:a);
186	let b: u32x8 = transmute(src:b);
187	transmute(src:simd_and(x:simd_xor(u32x8::splat(!(`0_u32`)), a), y:b))
188	}
189
190	/// Compares packed double-precision (64-bit) floating-point elements
191	/// in `a` and `b`, and returns packed maximum values
192	///
193	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_pd)
194	#[inline]
195	#[target_feature(enable = "avx")]
196	#[cfg_attr(test, assert_instr(vmaxpd))]
197	#[stable(feature = "simd_x86", since = "1.27.0")]
198	pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
199	vmaxpd(a, b)
200	}
201
202	/// Compares packed single-precision (32-bit) floating-point elements in `a`
203	/// and `b`, and returns packed maximum values
204	///
205	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_ps)
206	#[inline]
207	#[target_feature(enable = "avx")]
208	#[cfg_attr(test, assert_instr(vmaxps))]
209	#[stable(feature = "simd_x86", since = "1.27.0")]
210	pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
211	vmaxps(a, b)
212	}
213
214	/// Compares packed double-precision (64-bit) floating-point elements
215	/// in `a` and `b`, and returns packed minimum values
216	///
217	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_pd)
218	#[inline]
219	#[target_feature(enable = "avx")]
220	#[cfg_attr(test, assert_instr(vminpd))]
221	#[stable(feature = "simd_x86", since = "1.27.0")]
222	pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
223	vminpd(a, b)
224	}
225
226	/// Compares packed single-precision (32-bit) floating-point elements in `a`
227	/// and `b`, and returns packed minimum values
228	///
229	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_ps)
230	#[inline]
231	#[target_feature(enable = "avx")]
232	#[cfg_attr(test, assert_instr(vminps))]
233	#[stable(feature = "simd_x86", since = "1.27.0")]
234	pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
235	vminps(a, b)
236	}
237
238	/// Multiplies packed double-precision (64-bit) floating-point elements
239	/// in `a` and `b`.
240	///
241	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_pd)
242	#[inline]
243	#[target_feature(enable = "avx")]
244	#[cfg_attr(test, assert_instr(vmulpd))]
245	#[stable(feature = "simd_x86", since = "1.27.0")]
246	pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
247	simd_mul(x:a, y:b)
248	}
249
250	/// Multiplies packed single-precision (32-bit) floating-point elements in `a` and
251	/// `b`.
252	///
253	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_ps)
254	#[inline]
255	#[target_feature(enable = "avx")]
256	#[cfg_attr(test, assert_instr(vmulps))]
257	#[stable(feature = "simd_x86", since = "1.27.0")]
258	pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
259	simd_mul(x:a, y:b)
260	}
261
262	/// Alternatively adds and subtracts packed double-precision (64-bit)
263	/// floating-point elements in `a` to/from packed elements in `b`.
264	///
265	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_addsub_pd)
266	#[inline]
267	#[target_feature(enable = "avx")]
268	#[cfg_attr(test, assert_instr(vaddsubpd))]
269	#[stable(feature = "simd_x86", since = "1.27.0")]
270	pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
271	let a: f64x4 = a.as_f64x4();
272	let b: f64x4 = b.as_f64x4();
273	let add: f64x4 = simd_add(x:a, y:b);
274	let sub: f64x4 = simd_sub(x:a, y:b);
275	simd_shuffle!(add, sub, [`4`, `1`, `6`, `3`])
276	}
277
278	/// Alternatively adds and subtracts packed single-precision (32-bit)
279	/// floating-point elements in `a` to/from packed elements in `b`.
280	///
281	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_addsub_ps)
282	#[inline]
283	#[target_feature(enable = "avx")]
284	#[cfg_attr(test, assert_instr(vaddsubps))]
285	#[stable(feature = "simd_x86", since = "1.27.0")]
286	pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
287	let a: f32x8 = a.as_f32x8();
288	let b: f32x8 = b.as_f32x8();
289	let add: f32x8 = simd_add(x:a, y:b);
290	let sub: f32x8 = simd_sub(x:a, y:b);
291	simd_shuffle!(add, sub, [`8`, `1`, `10`, `3`, `12`, `5`, `14`, `7`])
292	}
293
294	/// Subtracts packed double-precision (64-bit) floating-point elements in `b`
295	/// from packed elements in `a`.
296	///
297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_pd)
298	#[inline]
299	#[target_feature(enable = "avx")]
300	#[cfg_attr(test, assert_instr(vsubpd))]
301	#[stable(feature = "simd_x86", since = "1.27.0")]
302	pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
303	simd_sub(x:a, y:b)
304	}
305
306	/// Subtracts packed single-precision (32-bit) floating-point elements in `b`
307	/// from packed elements in `a`.
308	///
309	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_ps)
310	#[inline]
311	#[target_feature(enable = "avx")]
312	#[cfg_attr(test, assert_instr(vsubps))]
313	#[stable(feature = "simd_x86", since = "1.27.0")]
314	pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
315	simd_sub(x:a, y:b)
316	}
317
318	/// Computes the division of each of the 8 packed 32-bit floating-point elements
319	/// in `a` by the corresponding packed elements in `b`.
320	///
321	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_div_ps)
322	#[inline]
323	#[target_feature(enable = "avx")]
324	#[cfg_attr(test, assert_instr(vdivps))]
325	#[stable(feature = "simd_x86", since = "1.27.0")]
326	pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
327	simd_div(x:a, y:b)
328	}
329
330	/// Computes the division of each of the 4 packed 64-bit floating-point elements
331	/// in `a` by the corresponding packed elements in `b`.
332	///
333	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_div_pd)
334	#[inline]
335	#[target_feature(enable = "avx")]
336	#[cfg_attr(test, assert_instr(vdivpd))]
337	#[stable(feature = "simd_x86", since = "1.27.0")]
338	pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
339	simd_div(x:a, y:b)
340	}
341
342	/// Rounds packed double-precision (64-bit) floating point elements in `a`
343	/// according to the flag `ROUNDING`. The value of `ROUNDING` may be as follows:
344	///
345	/// - `0x00`: Round to the nearest whole number.
346	/// - `0x01`: Round down, toward negative infinity.
347	/// - `0x02`: Round up, toward positive infinity.
348	/// - `0x03`: Truncate the values.
349	///
350	/// For a complete list of options, check [the LLVM docs][llvm_docs].
351	///
352	/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
353	///
354	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_pd)
355	#[inline]
356	#[target_feature(enable = "avx")]
357	#[cfg_attr(test, assert_instr(vroundpd, ROUNDING = `0x3`))]
358	#[rustc_legacy_const_generics(`1`)]
359	#[stable(feature = "simd_x86", since = "1.27.0")]
360	pub unsafe fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d {
361	static_assert_uimm_bits!(ROUNDING, `4`);
362	roundpd256(a, ROUNDING)
363	}
364
365	/// Rounds packed double-precision (64-bit) floating point elements in `a`
366	/// toward positive infinity.
367	///
368	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ceil_pd)
369	#[inline]
370	#[target_feature(enable = "avx")]
371	#[cfg_attr(test, assert_instr(vroundpd))]
372	#[stable(feature = "simd_x86", since = "1.27.0")]
373	pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d {
374	simd_ceil(a)
375	}
376
377	/// Rounds packed double-precision (64-bit) floating point elements in `a`
378	/// toward negative infinity.
379	///
380	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_floor_pd)
381	#[inline]
382	#[target_feature(enable = "avx")]
383	#[cfg_attr(test, assert_instr(vroundpd))]
384	#[stable(feature = "simd_x86", since = "1.27.0")]
385	pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d {
386	simd_floor(a)
387	}
388
389	/// Rounds packed single-precision (32-bit) floating point elements in `a`
390	/// according to the flag `ROUNDING`. The value of `ROUNDING` may be as follows:
391	///
392	/// - `0x00`: Round to the nearest whole number.
393	/// - `0x01`: Round down, toward negative infinity.
394	/// - `0x02`: Round up, toward positive infinity.
395	/// - `0x03`: Truncate the values.
396	///
397	/// For a complete list of options, check [the LLVM docs][llvm_docs].
398	///
399	/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
400	///
401	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_ps)
402	#[inline]
403	#[target_feature(enable = "avx")]
404	#[cfg_attr(test, assert_instr(vroundps, ROUNDING = `0x00`))]
405	#[rustc_legacy_const_generics(`1`)]
406	#[stable(feature = "simd_x86", since = "1.27.0")]
407	pub unsafe fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256 {
408	static_assert_uimm_bits!(ROUNDING, `4`);
409	roundps256(a, ROUNDING)
410	}
411
412	/// Rounds packed single-precision (32-bit) floating point elements in `a`
413	/// toward positive infinity.
414	///
415	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ceil_ps)
416	#[inline]
417	#[target_feature(enable = "avx")]
418	#[cfg_attr(test, assert_instr(vroundps))]
419	#[stable(feature = "simd_x86", since = "1.27.0")]
420	pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 {
421	simd_ceil(a)
422	}
423
424	/// Rounds packed single-precision (32-bit) floating point elements in `a`
425	/// toward negative infinity.
426	///
427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_floor_ps)
428	#[inline]
429	#[target_feature(enable = "avx")]
430	#[cfg_attr(test, assert_instr(vroundps))]
431	#[stable(feature = "simd_x86", since = "1.27.0")]
432	pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 {
433	simd_floor(a)
434	}
435
436	/// Returns the square root of packed single-precision (32-bit) floating point
437	/// elements in `a`.
438	///
439	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_ps)
440	#[inline]
441	#[target_feature(enable = "avx")]
442	#[cfg_attr(test, assert_instr(vsqrtps))]
443	#[stable(feature = "simd_x86", since = "1.27.0")]
444	pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 {
445	sqrtps256(a)
446	}
447
448	/// Returns the square root of packed double-precision (64-bit) floating point
449	/// elements in `a`.
450	///
451	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_pd)
452	#[inline]
453	#[target_feature(enable = "avx")]
454	#[cfg_attr(test, assert_instr(vsqrtpd))]
455	#[stable(feature = "simd_x86", since = "1.27.0")]
456	pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
457	simd_fsqrt(a)
458	}
459
460	/// Blends packed double-precision (64-bit) floating-point elements from
461	/// `a` and `b` using control mask `imm8`.
462	///
463	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_pd)
464	#[inline]
465	#[target_feature(enable = "avx")]
466	// Note: LLVM7 prefers single-precision blend instructions when
467	// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194
468	// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
469	#[cfg_attr(test, assert_instr(vblendps, IMM4 = `9`))]
470	#[rustc_legacy_const_generics(`2`)]
471	#[stable(feature = "simd_x86", since = "1.27.0")]
472	pub unsafe fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
473	static_assert_uimm_bits!(IMM4, `4`);
474	simd_shuffle!(
475	a,
476	b,
477	[
478	((IMM4 as u32 >> `0`) & `1`) * `4` + `0`,
479	((IMM4 as u32 >> `1`) & `1`) * `4` + `1`,
480	((IMM4 as u32 >> `2`) & `1`) * `4` + `2`,
481	((IMM4 as u32 >> `3`) & `1`) * `4` + `3`,
482	],
483	)
484	}
485
486	/// Blends packed single-precision (32-bit) floating-point elements from
487	/// `a` and `b` using control mask `imm8`.
488	///
489	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_ps)
490	#[inline]
491	#[target_feature(enable = "avx")]
492	#[cfg_attr(test, assert_instr(vblendps, IMM8 = `9`))]
493	#[rustc_legacy_const_generics(`2`)]
494	#[stable(feature = "simd_x86", since = "1.27.0")]
495	pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
496	static_assert_uimm_bits!(IMM8, `8`);
497	simd_shuffle!(
498	a,
499	b,
500	[
501	((IMM8 as u32 >> `0`) & `1`) * `8` + `0`,
502	((IMM8 as u32 >> `1`) & `1`) * `8` + `1`,
503	((IMM8 as u32 >> `2`) & `1`) * `8` + `2`,
504	((IMM8 as u32 >> `3`) & `1`) * `8` + `3`,
505	((IMM8 as u32 >> `4`) & `1`) * `8` + `4`,
506	((IMM8 as u32 >> `5`) & `1`) * `8` + `5`,
507	((IMM8 as u32 >> `6`) & `1`) * `8` + `6`,
508	((IMM8 as u32 >> `7`) & `1`) * `8` + `7`,
509	],
510	)
511	}
512
513	/// Blends packed double-precision (64-bit) floating-point elements from
514	/// `a` and `b` using `c` as a mask.
515	///
516	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_pd)
517	#[inline]
518	#[target_feature(enable = "avx")]
519	#[cfg_attr(test, assert_instr(vblendvpd))]
520	#[stable(feature = "simd_x86", since = "1.27.0")]
521	pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
522	let mask: i64x4 = simd_lt(x:transmute::<_, i64x4>(c), y:i64x4::splat(`0`));
523	transmute(src:simd_select(m:mask, a:b.as_f64x4(), b:a.as_f64x4()))
524	}
525
526	/// Blends packed single-precision (32-bit) floating-point elements from
527	/// `a` and `b` using `c` as a mask.
528	///
529	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_ps)
530	#[inline]
531	#[target_feature(enable = "avx")]
532	#[cfg_attr(test, assert_instr(vblendvps))]
533	#[stable(feature = "simd_x86", since = "1.27.0")]
534	pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
535	let mask: i32x8 = simd_lt(x:transmute::<_, i32x8>(c), y:i32x8::splat(`0`));
536	transmute(src:simd_select(m:mask, a:b.as_f32x8(), b:a.as_f32x8()))
537	}
538
539	/// Conditionally multiplies the packed single-precision (32-bit) floating-point
540	/// elements in `a` and `b` using the high 4 bits in `imm8`,
541	/// sum the four products, and conditionally return the sum
542	/// using the low 4 bits of `imm8`.
543	///
544	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dp_ps)
545	#[inline]
546	#[target_feature(enable = "avx")]
547	#[cfg_attr(test, assert_instr(vdpps, IMM8 = `0x0`))]
548	#[rustc_legacy_const_generics(`2`)]
549	#[stable(feature = "simd_x86", since = "1.27.0")]
550	pub unsafe fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
551	static_assert_uimm_bits!(IMM8, `8`);
552	vdpps(a, b, IMM8)
553	}
554
555	/// Horizontal addition of adjacent pairs in the two packed vectors
556	/// of 4 64-bit floating points `a` and `b`.
557	/// In the result, sums of elements from `a` are returned in even locations,
558	/// while sums of elements from `b` are returned in odd locations.
559	///
560	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_pd)
561	#[inline]
562	#[target_feature(enable = "avx")]
563	#[cfg_attr(test, assert_instr(vhaddpd))]
564	#[stable(feature = "simd_x86", since = "1.27.0")]
565	pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
566	vhaddpd(a, b)
567	}
568
569	/// Horizontal addition of adjacent pairs in the two packed vectors
570	/// of 8 32-bit floating points `a` and `b`.
571	/// In the result, sums of elements from `a` are returned in locations of
572	/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
573	/// 2, 3, 6, 7.
574	///
575	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_ps)
576	#[inline]
577	#[target_feature(enable = "avx")]
578	#[cfg_attr(test, assert_instr(vhaddps))]
579	#[stable(feature = "simd_x86", since = "1.27.0")]
580	pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
581	vhaddps(a, b)
582	}
583
584	/// Horizontal subtraction of adjacent pairs in the two packed vectors
585	/// of 4 64-bit floating points `a` and `b`.
586	/// In the result, sums of elements from `a` are returned in even locations,
587	/// while sums of elements from `b` are returned in odd locations.
588	///
589	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_pd)
590	#[inline]
591	#[target_feature(enable = "avx")]
592	#[cfg_attr(test, assert_instr(vhsubpd))]
593	#[stable(feature = "simd_x86", since = "1.27.0")]
594	pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
595	vhsubpd(a, b)
596	}
597
598	/// Horizontal subtraction of adjacent pairs in the two packed vectors
599	/// of 8 32-bit floating points `a` and `b`.
600	/// In the result, sums of elements from `a` are returned in locations of
601	/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
602	/// 2, 3, 6, 7.
603	///
604	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_ps)
605	#[inline]
606	#[target_feature(enable = "avx")]
607	#[cfg_attr(test, assert_instr(vhsubps))]
608	#[stable(feature = "simd_x86", since = "1.27.0")]
609	pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
610	vhsubps(a, b)
611	}
612
613	/// Computes the bitwise XOR of packed double-precision (64-bit) floating-point
614	/// elements in `a` and `b`.
615	///
616	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_pd)
617	#[inline]
618	#[target_feature(enable = "avx")]
619	// FIXME Should be 'vxorpd' instruction.
620	#[cfg_attr(test, assert_instr(vxorps))]
621	#[stable(feature = "simd_x86", since = "1.27.0")]
622	pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
623	let a: u64x4 = transmute(src:a);
624	let b: u64x4 = transmute(src:b);
625	transmute(src:simd_xor(x:a, y:b))
626	}
627
628	/// Computes the bitwise XOR of packed single-precision (32-bit) floating-point
629	/// elements in `a` and `b`.
630	///
631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_ps)
632	#[inline]
633	#[target_feature(enable = "avx")]
634	#[cfg_attr(test, assert_instr(vxorps))]
635	#[stable(feature = "simd_x86", since = "1.27.0")]
636	pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
637	let a: u32x8 = transmute(src:a);
638	let b: u32x8 = transmute(src:b);
639	transmute(src:simd_xor(x:a, y:b))
640	}
641
642	/// Equal (ordered, non-signaling)
643	#[stable(feature = "simd_x86", since = "1.27.0")]
644	pub const _CMP_EQ_OQ: i32 = `0x00`;
645	/// Less-than (ordered, signaling)
646	#[stable(feature = "simd_x86", since = "1.27.0")]
647	pub const _CMP_LT_OS: i32 = `0x01`;
648	/// Less-than-or-equal (ordered, signaling)
649	#[stable(feature = "simd_x86", since = "1.27.0")]
650	pub const _CMP_LE_OS: i32 = `0x02`;
651	/// Unordered (non-signaling)
652	#[stable(feature = "simd_x86", since = "1.27.0")]
653	pub const _CMP_UNORD_Q: i32 = `0x03`;
654	/// Not-equal (unordered, non-signaling)
655	#[stable(feature = "simd_x86", since = "1.27.0")]
656	pub const _CMP_NEQ_UQ: i32 = `0x04`;
657	/// Not-less-than (unordered, signaling)
658	#[stable(feature = "simd_x86", since = "1.27.0")]
659	pub const _CMP_NLT_US: i32 = `0x05`;
660	/// Not-less-than-or-equal (unordered, signaling)
661	#[stable(feature = "simd_x86", since = "1.27.0")]
662	pub const _CMP_NLE_US: i32 = `0x06`;
663	/// Ordered (non-signaling)
664	#[stable(feature = "simd_x86", since = "1.27.0")]
665	pub const _CMP_ORD_Q: i32 = `0x07`;
666	/// Equal (unordered, non-signaling)
667	#[stable(feature = "simd_x86", since = "1.27.0")]
668	pub const _CMP_EQ_UQ: i32 = `0x08`;
669	/// Not-greater-than-or-equal (unordered, signaling)
670	#[stable(feature = "simd_x86", since = "1.27.0")]
671	pub const _CMP_NGE_US: i32 = `0x09`;
672	/// Not-greater-than (unordered, signaling)
673	#[stable(feature = "simd_x86", since = "1.27.0")]
674	pub const _CMP_NGT_US: i32 = `0x0a`;
675	/// False (ordered, non-signaling)
676	#[stable(feature = "simd_x86", since = "1.27.0")]
677	pub const _CMP_FALSE_OQ: i32 = `0x0b`;
678	/// Not-equal (ordered, non-signaling)
679	#[stable(feature = "simd_x86", since = "1.27.0")]
680	pub const _CMP_NEQ_OQ: i32 = `0x0c`;
681	/// Greater-than-or-equal (ordered, signaling)
682	#[stable(feature = "simd_x86", since = "1.27.0")]
683	pub const _CMP_GE_OS: i32 = `0x0d`;
684	/// Greater-than (ordered, signaling)
685	#[stable(feature = "simd_x86", since = "1.27.0")]
686	pub const _CMP_GT_OS: i32 = `0x0e`;
687	/// True (unordered, non-signaling)
688	#[stable(feature = "simd_x86", since = "1.27.0")]
689	pub const _CMP_TRUE_UQ: i32 = `0x0f`;
690	/// Equal (ordered, signaling)
691	#[stable(feature = "simd_x86", since = "1.27.0")]
692	pub const _CMP_EQ_OS: i32 = `0x10`;
693	/// Less-than (ordered, non-signaling)
694	#[stable(feature = "simd_x86", since = "1.27.0")]
695	pub const _CMP_LT_OQ: i32 = `0x11`;
696	/// Less-than-or-equal (ordered, non-signaling)
697	#[stable(feature = "simd_x86", since = "1.27.0")]
698	pub const _CMP_LE_OQ: i32 = `0x12`;
699	/// Unordered (signaling)
700	#[stable(feature = "simd_x86", since = "1.27.0")]
701	pub const _CMP_UNORD_S: i32 = `0x13`;
702	/// Not-equal (unordered, signaling)
703	#[stable(feature = "simd_x86", since = "1.27.0")]
704	pub const _CMP_NEQ_US: i32 = `0x14`;
705	/// Not-less-than (unordered, non-signaling)
706	#[stable(feature = "simd_x86", since = "1.27.0")]
707	pub const _CMP_NLT_UQ: i32 = `0x15`;
708	/// Not-less-than-or-equal (unordered, non-signaling)
709	#[stable(feature = "simd_x86", since = "1.27.0")]
710	pub const _CMP_NLE_UQ: i32 = `0x16`;
711	/// Ordered (signaling)
712	#[stable(feature = "simd_x86", since = "1.27.0")]
713	pub const _CMP_ORD_S: i32 = `0x17`;
714	/// Equal (unordered, signaling)
715	#[stable(feature = "simd_x86", since = "1.27.0")]
716	pub const _CMP_EQ_US: i32 = `0x18`;
717	/// Not-greater-than-or-equal (unordered, non-signaling)
718	#[stable(feature = "simd_x86", since = "1.27.0")]
719	pub const _CMP_NGE_UQ: i32 = `0x19`;
720	/// Not-greater-than (unordered, non-signaling)
721	#[stable(feature = "simd_x86", since = "1.27.0")]
722	pub const _CMP_NGT_UQ: i32 = `0x1a`;
723	/// False (ordered, signaling)
724	#[stable(feature = "simd_x86", since = "1.27.0")]
725	pub const _CMP_FALSE_OS: i32 = `0x1b`;
726	/// Not-equal (ordered, signaling)
727	#[stable(feature = "simd_x86", since = "1.27.0")]
728	pub const _CMP_NEQ_OS: i32 = `0x1c`;
729	/// Greater-than-or-equal (ordered, non-signaling)
730	#[stable(feature = "simd_x86", since = "1.27.0")]
731	pub const _CMP_GE_OQ: i32 = `0x1d`;
732	/// Greater-than (ordered, non-signaling)
733	#[stable(feature = "simd_x86", since = "1.27.0")]
734	pub const _CMP_GT_OQ: i32 = `0x1e`;
735	/// True (unordered, signaling)
736	#[stable(feature = "simd_x86", since = "1.27.0")]
737	pub const _CMP_TRUE_US: i32 = `0x1f`;
738
739	/// Compares packed double-precision (64-bit) floating-point
740	/// elements in `a` and `b` based on the comparison operand
741	/// specified by `IMM5`.
742	///
743	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd)
744	#[inline]
745	#[target_feature(enable = "avx,sse2")]
746	#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = `0`))] // TODO Validate vcmppd
747	#[rustc_legacy_const_generics(`2`)]
748	#[stable(feature = "simd_x86", since = "1.27.0")]
749	pub unsafe fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
750	static_assert_uimm_bits!(IMM5, `5`);
751	vcmppd(a, b, imm8:const { IMM5 as i8 })
752	}
753
754	/// Compares packed double-precision (64-bit) floating-point
755	/// elements in `a` and `b` based on the comparison operand
756	/// specified by `IMM5`.
757	///
758	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd)
759	#[inline]
760	#[target_feature(enable = "avx")]
761	#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = `0`))] // TODO Validate vcmppd
762	#[rustc_legacy_const_generics(`2`)]
763	#[stable(feature = "simd_x86", since = "1.27.0")]
764	pub unsafe fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d {
765	static_assert_uimm_bits!(IMM5, `5`);
766	vcmppd256(a, b, IMM5 as u8)
767	}
768
769	/// Compares packed single-precision (32-bit) floating-point
770	/// elements in `a` and `b` based on the comparison operand
771	/// specified by `IMM5`.
772	///
773	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps)
774	#[inline]
775	#[target_feature(enable = "avx,sse")]
776	#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = `0`))] // TODO Validate vcmpps
777	#[rustc_legacy_const_generics(`2`)]
778	#[stable(feature = "simd_x86", since = "1.27.0")]
779	pub unsafe fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
780	static_assert_uimm_bits!(IMM5, `5`);
781	vcmpps(a, b, imm8:const { IMM5 as i8 })
782	}
783
784	/// Compares packed single-precision (32-bit) floating-point
785	/// elements in `a` and `b` based on the comparison operand
786	/// specified by `IMM5`.
787	///
788	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps)
789	#[inline]
790	#[target_feature(enable = "avx")]
791	#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = `0`))] // TODO Validate vcmpps
792	#[rustc_legacy_const_generics(`2`)]
793	#[stable(feature = "simd_x86", since = "1.27.0")]
794	pub unsafe fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256 {
795	static_assert_uimm_bits!(IMM5, `5`);
796	vcmpps256(a, b, imm8:const { IMM5 as u8 })
797	}
798
799	/// Compares the lower double-precision (64-bit) floating-point element in
800	/// `a` and `b` based on the comparison operand specified by `IMM5`,
801	/// store the result in the lower element of returned vector,
802	/// and copies the upper element from `a` to the upper element of returned
803	/// vector.
804	///
805	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd)
806	#[inline]
807	#[target_feature(enable = "avx,sse2")]
808	#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = `0`))] // TODO Validate vcmpsd
809	#[rustc_legacy_const_generics(`2`)]
810	#[stable(feature = "simd_x86", since = "1.27.0")]
811	pub unsafe fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
812	static_assert_uimm_bits!(IMM5, `5`);
813	vcmpsd(a, b, IMM5 as i8)
814	}
815
816	/// Compares the lower single-precision (32-bit) floating-point element in
817	/// `a` and `b` based on the comparison operand specified by `IMM5`,
818	/// store the result in the lower element of returned vector,
819	/// and copies the upper 3 packed elements from `a` to the upper elements of
820	/// returned vector.
821	///
822	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss)
823	#[inline]
824	#[target_feature(enable = "avx,sse")]
825	#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = `0`))] // TODO Validate vcmpss
826	#[rustc_legacy_const_generics(`2`)]
827	#[stable(feature = "simd_x86", since = "1.27.0")]
828	pub unsafe fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
829	static_assert_uimm_bits!(IMM5, `5`);
830	vcmpss(a, b, IMM5 as i8)
831	}
832
833	/// Converts packed 32-bit integers in `a` to packed double-precision (64-bit)
834	/// floating-point elements.
835	///
836	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_pd)
837	#[inline]
838	#[target_feature(enable = "avx")]
839	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
840	#[stable(feature = "simd_x86", since = "1.27.0")]
841	pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
842	simd_cast(a.as_i32x4())
843	}
844
845	/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
846	/// floating-point elements.
847	///
848	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_ps)
849	#[inline]
850	#[target_feature(enable = "avx")]
851	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
852	#[stable(feature = "simd_x86", since = "1.27.0")]
853	pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
854	vcvtdq2ps(a.as_i32x8())
855	}
856
857	/// Converts packed double-precision (64-bit) floating-point elements in `a`
858	/// to packed single-precision (32-bit) floating-point elements.
859	///
860	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_ps)
861	#[inline]
862	#[target_feature(enable = "avx")]
863	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
864	#[stable(feature = "simd_x86", since = "1.27.0")]
865	pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
866	vcvtpd2ps(a)
867	}
868
869	/// Converts packed single-precision (32-bit) floating-point elements in `a`
870	/// to packed 32-bit integers.
871	///
872	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi32)
873	#[inline]
874	#[target_feature(enable = "avx")]
875	#[cfg_attr(test, assert_instr(vcvtps2dq))]
876	#[stable(feature = "simd_x86", since = "1.27.0")]
877	pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i {
878	transmute(src:vcvtps2dq(a))
879	}
880
881	/// Converts packed single-precision (32-bit) floating-point elements in `a`
882	/// to packed double-precision (64-bit) floating-point elements.
883	///
884	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_pd)
885	#[inline]
886	#[target_feature(enable = "avx")]
887	#[cfg_attr(test, assert_instr(vcvtps2pd))]
888	#[stable(feature = "simd_x86", since = "1.27.0")]
889	pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d {
890	simd_cast(a)
891	}
892
893	/// Converts packed double-precision (64-bit) floating-point elements in `a`
894	/// to packed 32-bit integers with truncation.
895	///
896	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi32)
897	#[inline]
898	#[target_feature(enable = "avx")]
899	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
900	#[stable(feature = "simd_x86", since = "1.27.0")]
901	pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i {
902	transmute(src:vcvttpd2dq(a))
903	}
904
905	/// Converts packed double-precision (64-bit) floating-point elements in `a`
906	/// to packed 32-bit integers.
907	///
908	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi32)
909	#[inline]
910	#[target_feature(enable = "avx")]
911	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
912	#[stable(feature = "simd_x86", since = "1.27.0")]
913	pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i {
914	transmute(src:vcvtpd2dq(a))
915	}
916
917	/// Converts packed single-precision (32-bit) floating-point elements in `a`
918	/// to packed 32-bit integers with truncation.
919	///
920	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi32)
921	#[inline]
922	#[target_feature(enable = "avx")]
923	#[cfg_attr(test, assert_instr(vcvttps2dq))]
924	#[stable(feature = "simd_x86", since = "1.27.0")]
925	pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
926	transmute(src:vcvttps2dq(a))
927	}
928
929	/// Extracts 128 bits (composed of 4 packed single-precision (32-bit)
930	/// floating-point elements) from `a`, selected with `imm8`.
931	///
932	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_ps)
933	#[inline]
934	#[target_feature(enable = "avx")]
935	#[cfg_attr(
936	all(test, not(target_os = "windows")),
937	assert_instr(vextractf128, IMM1 = `1`)
938	)]
939	#[rustc_legacy_const_generics(`1`)]
940	#[stable(feature = "simd_x86", since = "1.27.0")]
941	pub unsafe fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
942	static_assert_uimm_bits!(IMM1, `1`);
943	simd_shuffle!(
944	a,
945	_mm256_undefined_ps(),
946	[[`0`, `1`, `2`, `3`], [`4`, `5`, `6`, `7`]][IMM1 as usize],
947	)
948	}
949
950	/// Extracts 128 bits (composed of 2 packed double-precision (64-bit)
951	/// floating-point elements) from `a`, selected with `imm8`.
952	///
953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_pd)
954	#[inline]
955	#[target_feature(enable = "avx")]
956	#[cfg_attr(
957	all(test, not(target_os = "windows")),
958	assert_instr(vextractf128, IMM1 = `1`)
959	)]
960	#[rustc_legacy_const_generics(`1`)]
961	#[stable(feature = "simd_x86", since = "1.27.0")]
962	pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
963	static_assert_uimm_bits!(IMM1, `1`);
964	simd_shuffle!(a, _mm256_undefined_pd(), [[`0`, `1`], [`2`, `3`]][IMM1 as usize])
965	}
966
967	/// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`.
968	///
969	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_si256)
970	#[inline]
971	#[target_feature(enable = "avx")]
972	#[cfg_attr(
973	all(test, not(target_os = "windows")),
974	assert_instr(vextractf128, IMM1 = `1`)
975	)]
976	#[rustc_legacy_const_generics(`1`)]
977	#[stable(feature = "simd_x86", since = "1.27.0")]
978	pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
979	static_assert_uimm_bits!(IMM1, `1`);
980	let dst: i64x2 = simd_shuffle!(
981	a.as_i64x4(),
982	_mm256_undefined_si256().as_i64x4(),
983	[[`0`, `1`], [`2`, `3`]][IMM1 as usize],
984	);
985	transmute(src:dst)
986	}
987
988	/// Zeroes the contents of all XMM or YMM registers.
989	///
990	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)
991	#[inline]
992	#[target_feature(enable = "avx")]
993	#[cfg_attr(test, assert_instr(vzeroall))]
994	#[stable(feature = "simd_x86", since = "1.27.0")]
995	pub unsafe fn _mm256_zeroall() {
996	vzeroall()
997	}
998
999	/// Zeroes the upper 128 bits of all YMM registers;
1000	/// the lower 128-bits of the registers are unmodified.
1001	///
1002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper)
1003	#[inline]
1004	#[target_feature(enable = "avx")]
1005	#[cfg_attr(test, assert_instr(vzeroupper))]
1006	#[stable(feature = "simd_x86", since = "1.27.0")]
1007	pub unsafe fn _mm256_zeroupper() {
1008	vzeroupper()
1009	}
1010
1011	/// Shuffles single-precision (32-bit) floating-point elements in `a`
1012	/// within 128-bit lanes using the control in `b`.
1013	///
1014	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_ps)
1015	#[inline]
1016	#[target_feature(enable = "avx")]
1017	#[cfg_attr(test, assert_instr(vpermilps))]
1018	#[stable(feature = "simd_x86", since = "1.27.0")]
1019	pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 {
1020	vpermilps256(a, b:b.as_i32x8())
1021	}
1022
1023	/// Shuffles single-precision (32-bit) floating-point elements in `a`
1024	/// using the control in `b`.
1025	///
1026	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_ps)
1027	#[inline]
1028	#[target_feature(enable = "avx")]
1029	#[cfg_attr(test, assert_instr(vpermilps))]
1030	#[stable(feature = "simd_x86", since = "1.27.0")]
1031	pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
1032	vpermilps(a, b:b.as_i32x4())
1033	}
1034
1035	/// Shuffles single-precision (32-bit) floating-point elements in `a`
1036	/// within 128-bit lanes using the control in `imm8`.
1037	///
1038	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute_ps)
1039	#[inline]
1040	#[target_feature(enable = "avx")]
1041	#[cfg_attr(test, assert_instr(vshufps, IMM8 = `9`))]
1042	#[rustc_legacy_const_generics(`1`)]
1043	#[stable(feature = "simd_x86", since = "1.27.0")]
1044	pub unsafe fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
1045	static_assert_uimm_bits!(IMM8, `8`);
1046	simd_shuffle!(
1047	a,
1048	_mm256_undefined_ps(),
1049	[
1050	(IMM8 as u32 >> `0`) & `0b11`,
1051	(IMM8 as u32 >> `2`) & `0b11`,
1052	(IMM8 as u32 >> `4`) & `0b11`,
1053	(IMM8 as u32 >> `6`) & `0b11`,
1054	((IMM8 as u32 >> `0`) & `0b11`) + `4`,
1055	((IMM8 as u32 >> `2`) & `0b11`) + `4`,
1056	((IMM8 as u32 >> `4`) & `0b11`) + `4`,
1057	((IMM8 as u32 >> `6`) & `0b11`) + `4`,
1058	],
1059	)
1060	}
1061
1062	/// Shuffles single-precision (32-bit) floating-point elements in `a`
1063	/// using the control in `imm8`.
1064	///
1065	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_ps)
1066	#[inline]
1067	#[target_feature(enable = "avx,sse")]
1068	#[cfg_attr(test, assert_instr(vshufps, IMM8 = `9`))]
1069	#[rustc_legacy_const_generics(`1`)]
1070	#[stable(feature = "simd_x86", since = "1.27.0")]
1071	pub unsafe fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
1072	static_assert_uimm_bits!(IMM8, `8`);
1073	simd_shuffle!(
1074	a,
1075	_mm_undefined_ps(),
1076	[
1077	(IMM8 as u32 >> `0`) & `0b11`,
1078	(IMM8 as u32 >> `2`) & `0b11`,
1079	(IMM8 as u32 >> `4`) & `0b11`,
1080	(IMM8 as u32 >> `6`) & `0b11`,
1081	],
1082	)
1083	}
1084
1085	/// Shuffles double-precision (64-bit) floating-point elements in `a`
1086	/// within 256-bit lanes using the control in `b`.
1087	///
1088	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_pd)
1089	#[inline]
1090	#[target_feature(enable = "avx")]
1091	#[cfg_attr(test, assert_instr(vpermilpd))]
1092	#[stable(feature = "simd_x86", since = "1.27.0")]
1093	pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d {
1094	vpermilpd256(a, b:b.as_i64x4())
1095	}
1096
1097	/// Shuffles double-precision (64-bit) floating-point elements in `a`
1098	/// using the control in `b`.
1099	///
1100	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_pd)
1101	#[inline]
1102	#[target_feature(enable = "avx")]
1103	#[cfg_attr(test, assert_instr(vpermilpd))]
1104	#[stable(feature = "simd_x86", since = "1.27.0")]
1105	pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
1106	vpermilpd(a, b:b.as_i64x2())
1107	}
1108
1109	/// Shuffles double-precision (64-bit) floating-point elements in `a`
1110	/// within 128-bit lanes using the control in `imm8`.
1111	///
1112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute_pd)
1113	#[inline]
1114	#[target_feature(enable = "avx")]
1115	#[cfg_attr(test, assert_instr(vshufpd, IMM4 = `0x1`))]
1116	#[rustc_legacy_const_generics(`1`)]
1117	#[stable(feature = "simd_x86", since = "1.27.0")]
1118	pub unsafe fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
1119	static_assert_uimm_bits!(IMM4, `4`);
1120	simd_shuffle!(
1121	a,
1122	_mm256_undefined_pd(),
1123	[
1124	((IMM4 as u32 >> `0`) & `1`),
1125	((IMM4 as u32 >> `1`) & `1`),
1126	((IMM4 as u32 >> `2`) & `1`) + `2`,
1127	((IMM4 as u32 >> `3`) & `1`) + `2`,
1128	],
1129	)
1130	}
1131
1132	/// Shuffles double-precision (64-bit) floating-point elements in `a`
1133	/// using the control in `imm8`.
1134	///
1135	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_pd)
1136	#[inline]
1137	#[target_feature(enable = "avx,sse2")]
1138	#[cfg_attr(test, assert_instr(vshufpd, IMM2 = `0x1`))]
1139	#[rustc_legacy_const_generics(`1`)]
1140	#[stable(feature = "simd_x86", since = "1.27.0")]
1141	pub unsafe fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
1142	static_assert_uimm_bits!(IMM2, `2`);
1143	simd_shuffle!(
1144	a,
1145	_mm_undefined_pd(),
1146	[(IMM2 as u32) & `1`, (IMM2 as u32 >> `1`) & `1`],
1147	)
1148	}
1149
1150	/// Shuffles 256 bits (composed of 8 packed single-precision (32-bit)
1151	/// floating-point elements) selected by `imm8` from `a` and `b`.
1152	///
1153	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_ps)
1154	#[inline]
1155	#[target_feature(enable = "avx")]
1156	#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = `0x5`))]
1157	#[rustc_legacy_const_generics(`2`)]
1158	#[stable(feature = "simd_x86", since = "1.27.0")]
1159	pub unsafe fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
1160	static_assert_uimm_bits!(IMM8, `8`);
1161	vperm2f128ps256(a, b, IMM8 as i8)
1162	}
1163
1164	/// Shuffles 256 bits (composed of 4 packed double-precision (64-bit)
1165	/// floating-point elements) selected by `imm8` from `a` and `b`.
1166	///
1167	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_pd)
1168	#[inline]
1169	#[target_feature(enable = "avx")]
1170	#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = `0x31`))]
1171	#[rustc_legacy_const_generics(`2`)]
1172	#[stable(feature = "simd_x86", since = "1.27.0")]
1173	pub unsafe fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
1174	static_assert_uimm_bits!(IMM8, `8`);
1175	vperm2f128pd256(a, b, IMM8 as i8)
1176	}
1177
1178	/// Shuffles 128-bits (composed of integer data) selected by `imm8`
1179	/// from `a` and `b`.
1180	///
1181	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_si256)
1182	#[inline]
1183	#[target_feature(enable = "avx")]
1184	#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = `0x31`))]
1185	#[rustc_legacy_const_generics(`2`)]
1186	#[stable(feature = "simd_x86", since = "1.27.0")]
1187	pub unsafe fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1188	static_assert_uimm_bits!(IMM8, `8`);
1189	transmute(src:vperm2f128si256(a:a.as_i32x8(), b:b.as_i32x8(), IMM8 as i8))
1190	}
1191
1192	/// Broadcasts a single-precision (32-bit) floating-point element from memory
1193	/// to all elements of the returned vector.
1194	///
1195	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_ss)
1196	#[inline]
1197	#[target_feature(enable = "avx")]
1198	#[cfg_attr(test, assert_instr(vbroadcastss))]
1199	#[stable(feature = "simd_x86", since = "1.27.0")]
1200	#[allow(clippy::trivially_copy_pass_by_ref)]
1201	pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
1202	_mm256_set1_ps(*f)
1203	}
1204
1205	/// Broadcasts a single-precision (32-bit) floating-point element from memory
1206	/// to all elements of the returned vector.
1207	///
1208	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_ss)
1209	#[inline]
1210	#[target_feature(enable = "avx")]
1211	#[cfg_attr(test, assert_instr(vbroadcastss))]
1212	#[stable(feature = "simd_x86", since = "1.27.0")]
1213	#[allow(clippy::trivially_copy_pass_by_ref)]
1214	pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
1215	_mm_set1_ps(*f)
1216	}
1217
1218	/// Broadcasts a double-precision (64-bit) floating-point element from memory
1219	/// to all elements of the returned vector.
1220	///
1221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_sd)
1222	#[inline]
1223	#[target_feature(enable = "avx")]
1224	#[cfg_attr(test, assert_instr(vbroadcastsd))]
1225	#[stable(feature = "simd_x86", since = "1.27.0")]
1226	#[allow(clippy::trivially_copy_pass_by_ref)]
1227	pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
1228	_mm256_set1_pd(*f)
1229	}
1230
1231	/// Broadcasts 128 bits from memory (composed of 4 packed single-precision
1232	/// (32-bit) floating-point elements) to all elements of the returned vector.
1233	///
1234	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_ps)
1235	#[inline]
1236	#[target_feature(enable = "avx")]
1237	#[cfg_attr(test, assert_instr(vbroadcastf128))]
1238	#[stable(feature = "simd_x86", since = "1.27.0")]
1239	pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
1240	vbroadcastf128ps256(a)
1241	}
1242
1243	/// Broadcasts 128 bits from memory (composed of 2 packed double-precision
1244	/// (64-bit) floating-point elements) to all elements of the returned vector.
1245	///
1246	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_pd)
1247	#[inline]
1248	#[target_feature(enable = "avx")]
1249	#[cfg_attr(test, assert_instr(vbroadcastf128))]
1250	#[stable(feature = "simd_x86", since = "1.27.0")]
1251	pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
1252	vbroadcastf128pd256(a)
1253	}
1254
1255	/// Copies `a` to result, then inserts 128 bits (composed of 4 packed
1256	/// single-precision (32-bit) floating-point elements) from `b` into result
1257	/// at the location specified by `imm8`.
1258	///
1259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_ps)
1260	#[inline]
1261	#[target_feature(enable = "avx")]
1262	#[cfg_attr(
1263	all(test, not(target_os = "windows")),
1264	assert_instr(vinsertf128, IMM1 = `1`)
1265	)]
1266	#[rustc_legacy_const_generics(`2`)]
1267	#[stable(feature = "simd_x86", since = "1.27.0")]
1268	pub unsafe fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
1269	static_assert_uimm_bits!(IMM1, `1`);
1270	simd_shuffle!(
1271	a,
1272	_mm256_castps128_ps256(b),
1273	[[`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`], [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]][IMM1 as usize],
1274	)
1275	}
1276
1277	/// Copies `a` to result, then inserts 128 bits (composed of 2 packed
1278	/// double-precision (64-bit) floating-point elements) from `b` into result
1279	/// at the location specified by `imm8`.
1280	///
1281	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_pd)
1282	#[inline]
1283	#[target_feature(enable = "avx")]
1284	#[cfg_attr(
1285	all(test, not(target_os = "windows")),
1286	assert_instr(vinsertf128, IMM1 = `1`)
1287	)]
1288	#[rustc_legacy_const_generics(`2`)]
1289	#[stable(feature = "simd_x86", since = "1.27.0")]
1290	pub unsafe fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
1291	static_assert_uimm_bits!(IMM1, `1`);
1292	simd_shuffle!(
1293	a,
1294	_mm256_castpd128_pd256(b),
1295	[[`4`, `5`, `2`, `3`], [`0`, `1`, `4`, `5`]][IMM1 as usize],
1296	)
1297	}
1298
1299	/// Copies `a` to result, then inserts 128 bits from `b` into result
1300	/// at the location specified by `imm8`.
1301	///
1302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_si256)
1303	#[inline]
1304	#[target_feature(enable = "avx")]
1305	#[cfg_attr(
1306	all(test, not(target_os = "windows")),
1307	assert_instr(vinsertf128, IMM1 = `1`)
1308	)]
1309	#[rustc_legacy_const_generics(`2`)]
1310	#[stable(feature = "simd_x86", since = "1.27.0")]
1311	pub unsafe fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1312	static_assert_uimm_bits!(IMM1, `1`);
1313	let dst: i64x4 = simd_shuffle!(
1314	a.as_i64x4(),
1315	_mm256_castsi128_si256(b).as_i64x4(),
1316	[[`4`, `5`, `2`, `3`], [`0`, `1`, `4`, `5`]][IMM1 as usize],
1317	);
1318	transmute(src:dst)
1319	}
1320
1321	/// Copies `a` to result, and inserts the 8-bit integer `i` into result
1322	/// at the location specified by `index`.
1323	///
1324	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi8)
1325	#[inline]
1326	#[target_feature(enable = "avx")]
1327	// This intrinsic has no corresponding instruction.
1328	#[rustc_legacy_const_generics(`2`)]
1329	#[stable(feature = "simd_x86", since = "1.27.0")]
1330	pub unsafe fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
1331	static_assert_uimm_bits!(INDEX, `5`);
1332	transmute(src:simd_insert(x:a.as_i8x32(), INDEX as u32, val:i))
1333	}
1334
1335	/// Copies `a` to result, and inserts the 16-bit integer `i` into result
1336	/// at the location specified by `index`.
1337	///
1338	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi16)
1339	#[inline]
1340	#[target_feature(enable = "avx")]
1341	// This intrinsic has no corresponding instruction.
1342	#[rustc_legacy_const_generics(`2`)]
1343	#[stable(feature = "simd_x86", since = "1.27.0")]
1344	pub unsafe fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
1345	static_assert_uimm_bits!(INDEX, `4`);
1346	transmute(src:simd_insert(x:a.as_i16x16(), INDEX as u32, val:i))
1347	}
1348
1349	/// Copies `a` to result, and inserts the 32-bit integer `i` into result
1350	/// at the location specified by `index`.
1351	///
1352	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi32)
1353	#[inline]
1354	#[target_feature(enable = "avx")]
1355	// This intrinsic has no corresponding instruction.
1356	#[rustc_legacy_const_generics(`2`)]
1357	#[stable(feature = "simd_x86", since = "1.27.0")]
1358	pub unsafe fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
1359	static_assert_uimm_bits!(INDEX, `3`);
1360	transmute(src:simd_insert(x:a.as_i32x8(), INDEX as u32, val:i))
1361	}
1362
1363	/// Loads 256-bits (composed of 4 packed double-precision (64-bit)
1364	/// floating-point elements) from memory into result.
1365	/// `mem_addr` must be aligned on a 32-byte boundary or a
1366	/// general-protection exception may be generated.
1367	///
1368	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd)
1369	#[inline]
1370	#[target_feature(enable = "avx")]
1371	#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
1372	#[stable(feature = "simd_x86", since = "1.27.0")]
1373	#[allow(clippy::cast_ptr_alignment)]
1374	pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
1375	(mem_addr as const __m256d)
1376	}
1377
1378	/// Stores 256-bits (composed of 4 packed double-precision (64-bit)
1379	/// floating-point elements) from `a` into memory.
1380	/// `mem_addr` must be aligned on a 32-byte boundary or a
1381	/// general-protection exception may be generated.
1382	///
1383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd)
1384	#[inline]
1385	#[target_feature(enable = "avx")]
1386	#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
1387	#[stable(feature = "simd_x86", since = "1.27.0")]
1388	#[allow(clippy::cast_ptr_alignment)]
1389	pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
1390	(mem_addr as mut __m256d) = a;
1391	}
1392
1393	/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
1394	/// floating-point elements) from memory into result.
1395	/// `mem_addr` must be aligned on a 32-byte boundary or a
1396	/// general-protection exception may be generated.
1397	///
1398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_ps)
1399	#[inline]
1400	#[target_feature(enable = "avx")]
1401	#[cfg_attr(test, assert_instr(vmovaps))]
1402	#[stable(feature = "simd_x86", since = "1.27.0")]
1403	#[allow(clippy::cast_ptr_alignment)]
1404	pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
1405	(mem_addr as const __m256)
1406	}
1407
1408	/// Stores 256-bits (composed of 8 packed single-precision (32-bit)
1409	/// floating-point elements) from `a` into memory.
1410	/// `mem_addr` must be aligned on a 32-byte boundary or a
1411	/// general-protection exception may be generated.
1412	///
1413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_ps)
1414	#[inline]
1415	#[target_feature(enable = "avx")]
1416	#[cfg_attr(test, assert_instr(vmovaps))]
1417	#[stable(feature = "simd_x86", since = "1.27.0")]
1418	#[allow(clippy::cast_ptr_alignment)]
1419	pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
1420	(mem_addr as mut __m256) = a;
1421	}
1422
1423	/// Loads 256-bits (composed of 4 packed double-precision (64-bit)
1424	/// floating-point elements) from memory into result.
1425	/// `mem_addr` does not need to be aligned on any particular boundary.
1426	///
1427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_pd)
1428	#[inline]
1429	#[target_feature(enable = "avx")]
1430	#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
1431	#[stable(feature = "simd_x86", since = "1.27.0")]
1432	pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
1433	let mut dst: __m256d = _mm256_undefined_pd();
1434	ptr::copy_nonoverlapping(
1435	src:mem_addr as *const u8,
1436	&mut dst as *mut __m256d as *mut u8,
1437	count:mem::size_of::<__m256d>(),
1438	);
1439	dst
1440	}
1441
1442	/// Stores 256-bits (composed of 4 packed double-precision (64-bit)
1443	/// floating-point elements) from `a` into memory.
1444	/// `mem_addr` does not need to be aligned on any particular boundary.
1445	///
1446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_pd)
1447	#[inline]
1448	#[target_feature(enable = "avx")]
1449	#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
1450	#[stable(feature = "simd_x86", since = "1.27.0")]
1451	pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
1452	mem_addr.cast::<__m256d>().write_unaligned(val:a);
1453	}
1454
1455	/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
1456	/// floating-point elements) from memory into result.
1457	/// `mem_addr` does not need to be aligned on any particular boundary.
1458	///
1459	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_ps)
1460	#[inline]
1461	#[target_feature(enable = "avx")]
1462	#[cfg_attr(test, assert_instr(vmovups))]
1463	#[stable(feature = "simd_x86", since = "1.27.0")]
1464	pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
1465	let mut dst: __m256 = _mm256_undefined_ps();
1466	ptr::copy_nonoverlapping(
1467	src:mem_addr as *const u8,
1468	&mut dst as *mut __m256 as *mut u8,
1469	count:mem::size_of::<__m256>(),
1470	);
1471	dst
1472	}
1473
1474	/// Stores 256-bits (composed of 8 packed single-precision (32-bit)
1475	/// floating-point elements) from `a` into memory.
1476	/// `mem_addr` does not need to be aligned on any particular boundary.
1477	///
1478	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_ps)
1479	#[inline]
1480	#[target_feature(enable = "avx")]
1481	#[cfg_attr(test, assert_instr(vmovups))]
1482	#[stable(feature = "simd_x86", since = "1.27.0")]
1483	pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
1484	mem_addr.cast::<__m256>().write_unaligned(val:a);
1485	}
1486
1487	/// Loads 256-bits of integer data from memory into result.
1488	/// `mem_addr` must be aligned on a 32-byte boundary or a
1489	/// general-protection exception may be generated.
1490	///
1491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_si256)
1492	#[inline]
1493	#[target_feature(enable = "avx")]
1494	#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
1495	#[stable(feature = "simd_x86", since = "1.27.0")]
1496	pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
1497	*mem_addr
1498	}
1499
1500	/// Stores 256-bits of integer data from `a` into memory.
1501	/// `mem_addr` must be aligned on a 32-byte boundary or a
1502	/// general-protection exception may be generated.
1503	///
1504	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_si256)
1505	#[inline]
1506	#[target_feature(enable = "avx")]
1507	#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
1508	#[stable(feature = "simd_x86", since = "1.27.0")]
1509	pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
1510	*mem_addr = a;
1511	}
1512
1513	/// Loads 256-bits of integer data from memory into result.
1514	/// `mem_addr` does not need to be aligned on any particular boundary.
1515	///
1516	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_si256)
1517	#[inline]
1518	#[target_feature(enable = "avx")]
1519	#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
1520	#[stable(feature = "simd_x86", since = "1.27.0")]
1521	pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
1522	let mut dst: __m256i = _mm256_undefined_si256();
1523	ptr::copy_nonoverlapping(
1524	src:mem_addr as *const u8,
1525	&mut dst as *mut __m256i as *mut u8,
1526	count:mem::size_of::<__m256i>(),
1527	);
1528	dst
1529	}
1530
1531	/// Stores 256-bits of integer data from `a` into memory.
1532	/// `mem_addr` does not need to be aligned on any particular boundary.
1533	///
1534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_si256)
1535	#[inline]
1536	#[target_feature(enable = "avx")]
1537	#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
1538	#[stable(feature = "simd_x86", since = "1.27.0")]
1539	pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
1540	mem_addr.write_unaligned(val:a);
1541	}
1542
1543	/// Loads packed double-precision (64-bit) floating-point elements from memory
1544	/// into result using `mask` (elements are zeroed out when the high bit of the
1545	/// corresponding element is not set).
1546	///
1547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_pd)
1548	#[inline]
1549	#[target_feature(enable = "avx")]
1550	#[cfg_attr(test, assert_instr(vmaskmovpd))]
1551	#[stable(feature = "simd_x86", since = "1.27.0")]
1552	pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
1553	maskloadpd256(mem_addr as *const i8, mask:mask.as_i64x4())
1554	}
1555
1556	/// Stores packed double-precision (64-bit) floating-point elements from `a`
1557	/// into memory using `mask`.
1558	///
1559	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_pd)
1560	#[inline]
1561	#[target_feature(enable = "avx")]
1562	#[cfg_attr(test, assert_instr(vmaskmovpd))]
1563	#[stable(feature = "simd_x86", since = "1.27.0")]
1564	pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
1565	maskstorepd256(mem_addr as *mut i8, mask:mask.as_i64x4(), a);
1566	}
1567
1568	/// Loads packed double-precision (64-bit) floating-point elements from memory
1569	/// into result using `mask` (elements are zeroed out when the high bit of the
1570	/// corresponding element is not set).
1571	///
1572	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_pd)
1573	#[inline]
1574	#[target_feature(enable = "avx")]
1575	#[cfg_attr(test, assert_instr(vmaskmovpd))]
1576	#[stable(feature = "simd_x86", since = "1.27.0")]
1577	pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
1578	maskloadpd(mem_addr as *const i8, mask:mask.as_i64x2())
1579	}
1580
1581	/// Stores packed double-precision (64-bit) floating-point elements from `a`
1582	/// into memory using `mask`.
1583	///
1584	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_pd)
1585	#[inline]
1586	#[target_feature(enable = "avx")]
1587	#[cfg_attr(test, assert_instr(vmaskmovpd))]
1588	#[stable(feature = "simd_x86", since = "1.27.0")]
1589	pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
1590	maskstorepd(mem_addr as *mut i8, mask:mask.as_i64x2(), a);
1591	}
1592
1593	/// Loads packed single-precision (32-bit) floating-point elements from memory
1594	/// into result using `mask` (elements are zeroed out when the high bit of the
1595	/// corresponding element is not set).
1596	///
1597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_ps)
1598	#[inline]
1599	#[target_feature(enable = "avx")]
1600	#[cfg_attr(test, assert_instr(vmaskmovps))]
1601	#[stable(feature = "simd_x86", since = "1.27.0")]
1602	pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
1603	maskloadps256(mem_addr as *const i8, mask:mask.as_i32x8())
1604	}
1605
1606	/// Stores packed single-precision (32-bit) floating-point elements from `a`
1607	/// into memory using `mask`.
1608	///
1609	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_ps)
1610	#[inline]
1611	#[target_feature(enable = "avx")]
1612	#[cfg_attr(test, assert_instr(vmaskmovps))]
1613	#[stable(feature = "simd_x86", since = "1.27.0")]
1614	pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
1615	maskstoreps256(mem_addr as *mut i8, mask:mask.as_i32x8(), a);
1616	}
1617
1618	/// Loads packed single-precision (32-bit) floating-point elements from memory
1619	/// into result using `mask` (elements are zeroed out when the high bit of the
1620	/// corresponding element is not set).
1621	///
1622	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_ps)
1623	#[inline]
1624	#[target_feature(enable = "avx")]
1625	#[cfg_attr(test, assert_instr(vmaskmovps))]
1626	#[stable(feature = "simd_x86", since = "1.27.0")]
1627	pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
1628	maskloadps(mem_addr as *const i8, mask:mask.as_i32x4())
1629	}
1630
1631	/// Stores packed single-precision (32-bit) floating-point elements from `a`
1632	/// into memory using `mask`.
1633	///
1634	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_ps)
1635	#[inline]
1636	#[target_feature(enable = "avx")]
1637	#[cfg_attr(test, assert_instr(vmaskmovps))]
1638	#[stable(feature = "simd_x86", since = "1.27.0")]
1639	pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
1640	maskstoreps(mem_addr as *mut i8, mask:mask.as_i32x4(), a);
1641	}
1642
1643	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
1644	/// from `a`, and returns the results.
1645	///
1646	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movehdup_ps)
1647	#[inline]
1648	#[target_feature(enable = "avx")]
1649	#[cfg_attr(test, assert_instr(vmovshdup))]
1650	#[stable(feature = "simd_x86", since = "1.27.0")]
1651	pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
1652	simd_shuffle!(a, a, [`1`, `1`, `3`, `3`, `5`, `5`, `7`, `7`])
1653	}
1654
1655	/// Duplicate even-indexed single-precision (32-bit) floating-point elements
1656	/// from `a`, and returns the results.
1657	///
1658	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_moveldup_ps)
1659	#[inline]
1660	#[target_feature(enable = "avx")]
1661	#[cfg_attr(test, assert_instr(vmovsldup))]
1662	#[stable(feature = "simd_x86", since = "1.27.0")]
1663	pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
1664	simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`])
1665	}
1666
1667	/// Duplicate even-indexed double-precision (64-bit) floating-point elements
1668	/// from `a`, and returns the results.
1669	///
1670	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movedup_pd)
1671	#[inline]
1672	#[target_feature(enable = "avx")]
1673	#[cfg_attr(test, assert_instr(vmovddup))]
1674	#[stable(feature = "simd_x86", since = "1.27.0")]
1675	pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d {
1676	simd_shuffle!(a, a, [`0`, `0`, `2`, `2`])
1677	}
1678
1679	/// Loads 256-bits of integer data from unaligned memory into result.
1680	/// This intrinsic may perform better than `_mm256_loadu_si256` when the
1681	/// data crosses a cache line boundary.
1682	///
1683	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lddqu_si256)
1684	#[inline]
1685	#[target_feature(enable = "avx")]
1686	#[cfg_attr(test, assert_instr(vlddqu))]
1687	#[stable(feature = "simd_x86", since = "1.27.0")]
1688	pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
1689	transmute(src:vlddqu(mem_addr as *const i8))
1690	}
1691
1692	/// Moves integer data from a 256-bit integer vector to a 32-byte
1693	/// aligned memory location. To minimize caching, the data is flagged as
1694	/// non-temporal (unlikely to be used again soon)
1695	///
1696	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_si256)
1697	#[inline]
1698	#[target_feature(enable = "avx")]
1699	#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
1700	#[stable(feature = "simd_x86", since = "1.27.0")]
1701	pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
1702	intrinsics::nontemporal_store(ptr:mem_addr, val:a);
1703	}
1704
1705	/// Moves double-precision values from a 256-bit vector of `[4 x double]`
1706	/// to a 32-byte aligned memory location. To minimize caching, the data is
1707	/// flagged as non-temporal (unlikely to be used again soon).
1708	///
1709	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_pd)
1710	#[inline]
1711	#[target_feature(enable = "avx")]
1712	#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
1713	#[stable(feature = "simd_x86", since = "1.27.0")]
1714	#[allow(clippy::cast_ptr_alignment)]
1715	pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
1716	intrinsics::nontemporal_store(ptr:mem_addr as *mut __m256d, val:a);
1717	}
1718
1719	/// Moves single-precision floating point values from a 256-bit vector
1720	/// of `[8 x float]` to a 32-byte aligned memory location. To minimize
1721	/// caching, the data is flagged as non-temporal (unlikely to be used again
1722	/// soon).
1723	///
1724	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_ps)
1725	#[inline]
1726	#[target_feature(enable = "avx")]
1727	#[cfg_attr(test, assert_instr(vmovntps))]
1728	#[stable(feature = "simd_x86", since = "1.27.0")]
1729	#[allow(clippy::cast_ptr_alignment)]
1730	pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) {
1731	intrinsics::nontemporal_store(ptr:mem_addr as *mut __m256, val:a);
1732	}
1733
1734	/// Computes the approximate reciprocal of packed single-precision (32-bit)
1735	/// floating-point elements in `a`, and returns the results. The maximum
1736	/// relative error for this approximation is less than 1.52^-12.*
1737	///
1738	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp_ps)
1739	#[inline]
1740	#[target_feature(enable = "avx")]
1741	#[cfg_attr(test, assert_instr(vrcpps))]
1742	#[stable(feature = "simd_x86", since = "1.27.0")]
1743	pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 {
1744	vrcpps(a)
1745	}
1746
1747	/// Computes the approximate reciprocal square root of packed single-precision
1748	/// (32-bit) floating-point elements in `a`, and returns the results.
1749	/// The maximum relative error for this approximation is less than 1.52^-12.*
1750	///
1751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt_ps)
1752	#[inline]
1753	#[target_feature(enable = "avx")]
1754	#[cfg_attr(test, assert_instr(vrsqrtps))]
1755	#[stable(feature = "simd_x86", since = "1.27.0")]
1756	pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
1757	vrsqrtps(a)
1758	}
1759
1760	/// Unpacks and interleave double-precision (64-bit) floating-point elements
1761	/// from the high half of each 128-bit lane in `a` and `b`.
1762	///
1763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_pd)
1764	#[inline]
1765	#[target_feature(enable = "avx")]
1766	#[cfg_attr(test, assert_instr(vunpckhpd))]
1767	#[stable(feature = "simd_x86", since = "1.27.0")]
1768	pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
1769	simd_shuffle!(a, b, [`1`, `5`, `3`, `7`])
1770	}
1771
1772	/// Unpacks and interleave single-precision (32-bit) floating-point elements
1773	/// from the high half of each 128-bit lane in `a` and `b`.
1774	///
1775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_ps)
1776	#[inline]
1777	#[target_feature(enable = "avx")]
1778	#[cfg_attr(test, assert_instr(vunpckhps))]
1779	#[stable(feature = "simd_x86", since = "1.27.0")]
1780	pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
1781	simd_shuffle!(a, b, [`2`, `10`, `3`, `11`, `6`, `14`, `7`, `15`])
1782	}
1783
1784	/// Unpacks and interleave double-precision (64-bit) floating-point elements
1785	/// from the low half of each 128-bit lane in `a` and `b`.
1786	///
1787	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_pd)
1788	#[inline]
1789	#[target_feature(enable = "avx")]
1790	#[cfg_attr(test, assert_instr(vunpcklpd))]
1791	#[stable(feature = "simd_x86", since = "1.27.0")]
1792	pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
1793	simd_shuffle!(a, b, [`0`, `4`, `2`, `6`])
1794	}
1795
1796	/// Unpacks and interleave single-precision (32-bit) floating-point elements
1797	/// from the low half of each 128-bit lane in `a` and `b`.
1798	///
1799	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_ps)
1800	#[inline]
1801	#[target_feature(enable = "avx")]
1802	#[cfg_attr(test, assert_instr(vunpcklps))]
1803	#[stable(feature = "simd_x86", since = "1.27.0")]
1804	pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
1805	simd_shuffle!(a, b, [`0`, `8`, `1`, `9`, `4`, `12`, `5`, `13`])
1806	}
1807
1808	/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
1809	/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
1810	/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
1811	/// the result is zero, otherwise set `CF` to 0. Return the `ZF` value.
1812	///
1813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_si256)
1814	#[inline]
1815	#[target_feature(enable = "avx")]
1816	#[cfg_attr(test, assert_instr(vptest))]
1817	#[stable(feature = "simd_x86", since = "1.27.0")]
1818	pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
1819	ptestz256(a:a.as_i64x4(), b:b.as_i64x4())
1820	}
1821
1822	/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
1823	/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
1824	/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
1825	/// the result is zero, otherwise set `CF` to 0. Return the `CF` value.
1826	///
1827	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_si256)
1828	#[inline]
1829	#[target_feature(enable = "avx")]
1830	#[cfg_attr(test, assert_instr(vptest))]
1831	#[stable(feature = "simd_x86", since = "1.27.0")]
1832	pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
1833	ptestc256(a:a.as_i64x4(), b:b.as_i64x4())
1834	}
1835
1836	/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
1837	/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
1838	/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
1839	/// the result is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and
1840	/// `CF` values are zero, otherwise return 0.
1841	///
1842	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_si256)
1843	#[inline]
1844	#[target_feature(enable = "avx")]
1845	#[cfg_attr(test, assert_instr(vptest))]
1846	#[stable(feature = "simd_x86", since = "1.27.0")]
1847	pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 {
1848	ptestnzc256(a:a.as_i64x4(), b:b.as_i64x4())
1849	}
1850
1851	/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
1852	/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1853	/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1854	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1855	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1856	/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1857	/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
1858	///
1859	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_pd)
1860	#[inline]
1861	#[target_feature(enable = "avx")]
1862	#[cfg_attr(test, assert_instr(vtestpd))]
1863	#[stable(feature = "simd_x86", since = "1.27.0")]
1864	pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 {
1865	vtestzpd256(a, b)
1866	}
1867
1868	/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
1869	/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1870	/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1871	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1872	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1873	/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1874	/// is zero, otherwise set `CF` to 0. Return the `CF` value.
1875	///
1876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_pd)
1877	#[inline]
1878	#[target_feature(enable = "avx")]
1879	#[cfg_attr(test, assert_instr(vtestpd))]
1880	#[stable(feature = "simd_x86", since = "1.27.0")]
1881	pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 {
1882	vtestcpd256(a, b)
1883	}
1884
1885	/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
1886	/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1887	/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1888	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1889	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1890	/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1891	/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
1892	/// are zero, otherwise return 0.
1893	///
1894	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_pd)
1895	#[inline]
1896	#[target_feature(enable = "avx")]
1897	#[cfg_attr(test, assert_instr(vtestpd))]
1898	#[stable(feature = "simd_x86", since = "1.27.0")]
1899	pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
1900	vtestnzcpd256(a, b)
1901	}
1902
1903	/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
1904	/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
1905	/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1906	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1907	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1908	/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1909	/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
1910	///
1911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_pd)
1912	#[inline]
1913	#[target_feature(enable = "avx")]
1914	#[cfg_attr(test, assert_instr(vtestpd))]
1915	#[stable(feature = "simd_x86", since = "1.27.0")]
1916	pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
1917	vtestzpd(a, b)
1918	}
1919
1920	/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
1921	/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
1922	/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1923	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1924	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1925	/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1926	/// is zero, otherwise set `CF` to 0. Return the `CF` value.
1927	///
1928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_pd)
1929	#[inline]
1930	#[target_feature(enable = "avx")]
1931	#[cfg_attr(test, assert_instr(vtestpd))]
1932	#[stable(feature = "simd_x86", since = "1.27.0")]
1933	pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
1934	vtestcpd(a, b)
1935	}
1936
1937	/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
1938	/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
1939	/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1940	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1941	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1942	/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1943	/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
1944	/// are zero, otherwise return 0.
1945	///
1946	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_pd)
1947	#[inline]
1948	#[target_feature(enable = "avx")]
1949	#[cfg_attr(test, assert_instr(vtestpd))]
1950	#[stable(feature = "simd_x86", since = "1.27.0")]
1951	pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 {
1952	vtestnzcpd(a, b)
1953	}
1954
1955	/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
1956	/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1957	/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
1958	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1959	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1960	/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
1961	/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
1962	///
1963	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_ps)
1964	#[inline]
1965	#[target_feature(enable = "avx")]
1966	#[cfg_attr(test, assert_instr(vtestps))]
1967	#[stable(feature = "simd_x86", since = "1.27.0")]
1968	pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 {
1969	vtestzps256(a, b)
1970	}
1971
1972	/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
1973	/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1974	/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
1975	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1976	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1977	/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
1978	/// is zero, otherwise set `CF` to 0. Return the `CF` value.
1979	///
1980	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_ps)
1981	#[inline]
1982	#[target_feature(enable = "avx")]
1983	#[cfg_attr(test, assert_instr(vtestps))]
1984	#[stable(feature = "simd_x86", since = "1.27.0")]
1985	pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 {
1986	vtestcps256(a, b)
1987	}
1988
1989	/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
1990	/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1991	/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
1992	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1993	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1994	/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
1995	/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
1996	/// are zero, otherwise return 0.
1997	///
1998	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_ps)
1999	#[inline]
2000	#[target_feature(enable = "avx")]
2001	#[cfg_attr(test, assert_instr(vtestps))]
2002	#[stable(feature = "simd_x86", since = "1.27.0")]
2003	pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
2004	vtestnzcps256(a, b)
2005	}
2006
2007	/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2008	/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2009	/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2010	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2011	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2012	/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2013	/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
2014	///
2015	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_ps)
2016	#[inline]
2017	#[target_feature(enable = "avx")]
2018	#[cfg_attr(test, assert_instr(vtestps))]
2019	#[stable(feature = "simd_x86", since = "1.27.0")]
2020	pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
2021	vtestzps(a, b)
2022	}
2023
2024	/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2025	/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2026	/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2027	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2028	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2029	/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2030	/// is zero, otherwise set `CF` to 0. Return the `CF` value.
2031	///
2032	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_ps)
2033	#[inline]
2034	#[target_feature(enable = "avx")]
2035	#[cfg_attr(test, assert_instr(vtestps))]
2036	#[stable(feature = "simd_x86", since = "1.27.0")]
2037	pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
2038	vtestcps(a, b)
2039	}
2040
2041	/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2042	/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2043	/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2044	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2045	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2046	/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2047	/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
2048	/// are zero, otherwise return 0.
2049	///
2050	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_ps)
2051	#[inline]
2052	#[target_feature(enable = "avx")]
2053	#[cfg_attr(test, assert_instr(vtestps))]
2054	#[stable(feature = "simd_x86", since = "1.27.0")]
2055	pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
2056	vtestnzcps(a, b)
2057	}
2058
2059	/// Sets each bit of the returned mask based on the most significant bit of the
2060	/// corresponding packed double-precision (64-bit) floating-point element in
2061	/// `a`.
2062	///
2063	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_pd)
2064	#[inline]
2065	#[target_feature(enable = "avx")]
2066	#[cfg_attr(test, assert_instr(vmovmskpd))]
2067	#[stable(feature = "simd_x86", since = "1.27.0")]
2068	pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
2069	// Propagate the highest bit to the rest, because simd_bitmask
2070	// requires all-1 or all-0.
2071	let mask: i64x4 = simd_lt(x:transmute(a), y:i64x4::splat(`0`));
2072	simd_bitmask::<i64x4, u8>(mask).into()
2073	}
2074
2075	/// Sets each bit of the returned mask based on the most significant bit of the
2076	/// corresponding packed single-precision (32-bit) floating-point element in
2077	/// `a`.
2078	///
2079	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_ps)
2080	#[inline]
2081	#[target_feature(enable = "avx")]
2082	#[cfg_attr(test, assert_instr(vmovmskps))]
2083	#[stable(feature = "simd_x86", since = "1.27.0")]
2084	pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
2085	// Propagate the highest bit to the rest, because simd_bitmask
2086	// requires all-1 or all-0.
2087	let mask: i32x8 = simd_lt(x:transmute(a), y:i32x8::splat(`0`));
2088	simd_bitmask::<i32x8, u8>(mask).into()
2089	}
2090
2091	/// Returns vector of type __m256d with all elements set to zero.
2092	///
2093	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_pd)
2094	#[inline]
2095	#[target_feature(enable = "avx")]
2096	#[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected
2097	#[stable(feature = "simd_x86", since = "1.27.0")]
2098	pub unsafe fn _mm256_setzero_pd() -> __m256d {
2099	_mm256_set1_pd(`0.0`)
2100	}
2101
2102	/// Returns vector of type __m256 with all elements set to zero.
2103	///
2104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_ps)
2105	#[inline]
2106	#[target_feature(enable = "avx")]
2107	#[cfg_attr(test, assert_instr(vxorps))]
2108	#[stable(feature = "simd_x86", since = "1.27.0")]
2109	pub unsafe fn _mm256_setzero_ps() -> __m256 {
2110	_mm256_set1_ps(`0.0`)
2111	}
2112
2113	/// Returns vector of type __m256i with all elements set to zero.
2114	///
2115	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_si256)
2116	#[inline]
2117	#[target_feature(enable = "avx")]
2118	#[cfg_attr(test, assert_instr(vxor))]
2119	#[stable(feature = "simd_x86", since = "1.27.0")]
2120	pub unsafe fn _mm256_setzero_si256() -> __m256i {
2121	_mm256_set1_epi8(`0`)
2122	}
2123
2124	/// Sets packed double-precision (64-bit) floating-point elements in returned
2125	/// vector with the supplied values.
2126	///
2127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_pd)
2128	#[inline]
2129	#[target_feature(enable = "avx")]
2130	// This intrinsic has no corresponding instruction.
2131	#[cfg_attr(test, assert_instr(vinsertf128))]
2132	#[stable(feature = "simd_x86", since = "1.27.0")]
2133	pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2134	_mm256_setr_pd(a:d, b:c, c:b, d:a)
2135	}
2136
2137	/// Sets packed single-precision (32-bit) floating-point elements in returned
2138	/// vector with the supplied values.
2139	///
2140	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_ps)
2141	#[inline]
2142	#[target_feature(enable = "avx")]
2143	// This intrinsic has no corresponding instruction.
2144	#[stable(feature = "simd_x86", since = "1.27.0")]
2145	pub unsafe fn _mm256_set_ps(
2146	a: f32,
2147	b: f32,
2148	c: f32,
2149	d: f32,
2150	e: f32,
2151	f: f32,
2152	g: f32,
2153	h: f32,
2154	) -> __m256 {
2155	_mm256_setr_ps(a:h, b:g, c:f, d:e, e:d, f:c, g:b, h:a)
2156	}
2157
2158	/// Sets packed 8-bit integers in returned vector with the supplied values.
2159	///
2160	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi8)
2161	#[inline]
2162	#[target_feature(enable = "avx")]
2163	// This intrinsic has no corresponding instruction.
2164	#[stable(feature = "simd_x86", since = "1.27.0")]
2165	pub unsafe fn _mm256_set_epi8(
2166	e00: i8,
2167	e01: i8,
2168	e02: i8,
2169	e03: i8,
2170	e04: i8,
2171	e05: i8,
2172	e06: i8,
2173	e07: i8,
2174	e08: i8,
2175	e09: i8,
2176	e10: i8,
2177	e11: i8,
2178	e12: i8,
2179	e13: i8,
2180	e14: i8,
2181	e15: i8,
2182	e16: i8,
2183	e17: i8,
2184	e18: i8,
2185	e19: i8,
2186	e20: i8,
2187	e21: i8,
2188	e22: i8,
2189	e23: i8,
2190	e24: i8,
2191	e25: i8,
2192	e26: i8,
2193	e27: i8,
2194	e28: i8,
2195	e29: i8,
2196	e30: i8,
2197	e31: i8,
2198	) -> __m256i {
2199	#[rustfmt::skip]
2200	_mm256_setr_epi8(
2201	e00:e31, e01:e30, e02:e29, e03:e28, e04:e27, e05:e26, e06:e25, e07:e24,
2202	e08:e23, e09:e22, e10:e21, e11:e20, e12:e19, e13:e18, e14:e17, e15:e16,
2203	e16:e15, e17:e14, e18:e13, e19:e12, e20:e11, e21:e10, e22:e09, e23:e08,
2204	e24:e07, e25:e06, e26:e05, e27:e04, e28:e03, e29:e02, e30:e01, e31:e00,
2205	)
2206	}
2207
2208	/// Sets packed 16-bit integers in returned vector with the supplied values.
2209	///
2210	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi16)
2211	#[inline]
2212	#[target_feature(enable = "avx")]
2213	// This intrinsic has no corresponding instruction.
2214	#[stable(feature = "simd_x86", since = "1.27.0")]
2215	pub unsafe fn _mm256_set_epi16(
2216	e00: i16,
2217	e01: i16,
2218	e02: i16,
2219	e03: i16,
2220	e04: i16,
2221	e05: i16,
2222	e06: i16,
2223	e07: i16,
2224	e08: i16,
2225	e09: i16,
2226	e10: i16,
2227	e11: i16,
2228	e12: i16,
2229	e13: i16,
2230	e14: i16,
2231	e15: i16,
2232	) -> __m256i {
2233	#[rustfmt::skip]
2234	_mm256_setr_epi16(
2235	e00:e15, e01:e14, e02:e13, e03:e12,
2236	e04:e11, e05:e10, e06:e09, e07:e08,
2237	e08:e07, e09:e06, e10:e05, e11:e04,
2238	e12:e03, e13:e02, e14:e01, e15:e00,
2239	)
2240	}
2241
2242	/// Sets packed 32-bit integers in returned vector with the supplied values.
2243	///
2244	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi32)
2245	#[inline]
2246	#[target_feature(enable = "avx")]
2247	// This intrinsic has no corresponding instruction.
2248	#[stable(feature = "simd_x86", since = "1.27.0")]
2249	pub unsafe fn _mm256_set_epi32(
2250	e0: i32,
2251	e1: i32,
2252	e2: i32,
2253	e3: i32,
2254	e4: i32,
2255	e5: i32,
2256	e6: i32,
2257	e7: i32,
2258	) -> __m256i {
2259	_mm256_setr_epi32(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
2260	}
2261
2262	/// Sets packed 64-bit integers in returned vector with the supplied values.
2263	///
2264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi64x)
2265	#[inline]
2266	#[target_feature(enable = "avx")]
2267	// This intrinsic has no corresponding instruction.
2268	#[stable(feature = "simd_x86", since = "1.27.0")]
2269	pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2270	_mm256_setr_epi64x(a:d, b:c, c:b, d:a)
2271	}
2272
2273	/// Sets packed double-precision (64-bit) floating-point elements in returned
2274	/// vector with the supplied values in reverse order.
2275	///
2276	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_pd)
2277	#[inline]
2278	#[target_feature(enable = "avx")]
2279	// This intrinsic has no corresponding instruction.
2280	#[stable(feature = "simd_x86", since = "1.27.0")]
2281	pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2282	__m256d(a, b, c, d)
2283	}
2284
2285	/// Sets packed single-precision (32-bit) floating-point elements in returned
2286	/// vector with the supplied values in reverse order.
2287	///
2288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_ps)
2289	#[inline]
2290	#[target_feature(enable = "avx")]
2291	// This intrinsic has no corresponding instruction.
2292	#[stable(feature = "simd_x86", since = "1.27.0")]
2293	pub unsafe fn _mm256_setr_ps(
2294	a: f32,
2295	b: f32,
2296	c: f32,
2297	d: f32,
2298	e: f32,
2299	f: f32,
2300	g: f32,
2301	h: f32,
2302	) -> __m256 {
2303	__m256(a, b, c, d, e, f, g, h)
2304	}
2305
2306	/// Sets packed 8-bit integers in returned vector with the supplied values in
2307	/// reverse order.
2308	///
2309	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi8)
2310	#[inline]
2311	#[target_feature(enable = "avx")]
2312	// This intrinsic has no corresponding instruction.
2313	#[stable(feature = "simd_x86", since = "1.27.0")]
2314	pub unsafe fn _mm256_setr_epi8(
2315	e00: i8,
2316	e01: i8,
2317	e02: i8,
2318	e03: i8,
2319	e04: i8,
2320	e05: i8,
2321	e06: i8,
2322	e07: i8,
2323	e08: i8,
2324	e09: i8,
2325	e10: i8,
2326	e11: i8,
2327	e12: i8,
2328	e13: i8,
2329	e14: i8,
2330	e15: i8,
2331	e16: i8,
2332	e17: i8,
2333	e18: i8,
2334	e19: i8,
2335	e20: i8,
2336	e21: i8,
2337	e22: i8,
2338	e23: i8,
2339	e24: i8,
2340	e25: i8,
2341	e26: i8,
2342	e27: i8,
2343	e28: i8,
2344	e29: i8,
2345	e30: i8,
2346	e31: i8,
2347	) -> __m256i {
2348	#[rustfmt::skip]
2349	transmute(src:i8x32::new(
2350	x0:e00, x1:e01, x2:e02, x3:e03, x4:e04, x5:e05, x6:e06, x7:e07,
2351	x8:e08, x9:e09, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
2352	x16:e16, x17:e17, x18:e18, x19:e19, x20:e20, x21:e21, x22:e22, x23:e23,
2353	x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31,
2354	))
2355	}
2356
2357	/// Sets packed 16-bit integers in returned vector with the supplied values in
2358	/// reverse order.
2359	///
2360	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi16)
2361	#[inline]
2362	#[target_feature(enable = "avx")]
2363	// This intrinsic has no corresponding instruction.
2364	#[stable(feature = "simd_x86", since = "1.27.0")]
2365	pub unsafe fn _mm256_setr_epi16(
2366	e00: i16,
2367	e01: i16,
2368	e02: i16,
2369	e03: i16,
2370	e04: i16,
2371	e05: i16,
2372	e06: i16,
2373	e07: i16,
2374	e08: i16,
2375	e09: i16,
2376	e10: i16,
2377	e11: i16,
2378	e12: i16,
2379	e13: i16,
2380	e14: i16,
2381	e15: i16,
2382	) -> __m256i {
2383	#[rustfmt::skip]
2384	transmute(src:i16x16::new(
2385	x0:e00, x1:e01, x2:e02, x3:e03,
2386	x4:e04, x5:e05, x6:e06, x7:e07,
2387	x8:e08, x9:e09, x10:e10, x11:e11,
2388	x12:e12, x13:e13, x14:e14, x15:e15,
2389	))
2390	}
2391
2392	/// Sets packed 32-bit integers in returned vector with the supplied values in
2393	/// reverse order.
2394	///
2395	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi32)
2396	#[inline]
2397	#[target_feature(enable = "avx")]
2398	// This intrinsic has no corresponding instruction.
2399	#[stable(feature = "simd_x86", since = "1.27.0")]
2400	pub unsafe fn _mm256_setr_epi32(
2401	e0: i32,
2402	e1: i32,
2403	e2: i32,
2404	e3: i32,
2405	e4: i32,
2406	e5: i32,
2407	e6: i32,
2408	e7: i32,
2409	) -> __m256i {
2410	transmute(src:i32x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7))
2411	}
2412
2413	/// Sets packed 64-bit integers in returned vector with the supplied values in
2414	/// reverse order.
2415	///
2416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi64x)
2417	#[inline]
2418	#[target_feature(enable = "avx")]
2419	// This intrinsic has no corresponding instruction.
2420	#[stable(feature = "simd_x86", since = "1.27.0")]
2421	pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2422	transmute(src:i64x4::new(x0:a, x1:b, x2:c, x3:d))
2423	}
2424
2425	/// Broadcasts double-precision (64-bit) floating-point value `a` to all
2426	/// elements of returned vector.
2427	///
2428	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_pd)
2429	#[inline]
2430	#[target_feature(enable = "avx")]
2431	// This intrinsic has no corresponding instruction.
2432	#[stable(feature = "simd_x86", since = "1.27.0")]
2433	pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d {
2434	_mm256_setr_pd(a, b:a, c:a, d:a)
2435	}
2436
2437	/// Broadcasts single-precision (32-bit) floating-point value `a` to all
2438	/// elements of returned vector.
2439	///
2440	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_ps)
2441	#[inline]
2442	#[target_feature(enable = "avx")]
2443	// This intrinsic has no corresponding instruction.
2444	#[stable(feature = "simd_x86", since = "1.27.0")]
2445	pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 {
2446	_mm256_setr_ps(a, b:a, c:a, d:a, e:a, f:a, g:a, h:a)
2447	}
2448
2449	/// Broadcasts 8-bit integer `a` to all elements of returned vector.
2450	/// This intrinsic may generate the `vpbroadcastb`.
2451	///
2452	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi8)
2453	#[inline]
2454	#[target_feature(enable = "avx")]
2455	// This intrinsic has no corresponding instruction.
2456	#[stable(feature = "simd_x86", since = "1.27.0")]
2457	pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i {
2458	#[rustfmt::skip]
2459	_mm256_setr_epi8(
2460	e00:a, e01:a, e02:a, e03:a, e04:a, e05:a, e06:a, e07:a,
2461	e08:a, e09:a, e10:a, e11:a, e12:a, e13:a, e14:a, e15:a,
2462	e16:a, e17:a, e18:a, e19:a, e20:a, e21:a, e22:a, e23:a,
2463	e24:a, e25:a, e26:a, e27:a, e28:a, e29:a, e30:a, e31:a,
2464	)
2465	}
2466
2467	/// Broadcasts 16-bit integer `a` to all elements of returned vector.
2468	/// This intrinsic may generate the `vpbroadcastw`.
2469	///
2470	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi16)
2471	#[inline]
2472	#[target_feature(enable = "avx")]
2473	//#[cfg_attr(test, assert_instr(vpshufb))]
2474	#[cfg_attr(test, assert_instr(vinsertf128))]
2475	// This intrinsic has no corresponding instruction.
2476	#[stable(feature = "simd_x86", since = "1.27.0")]
2477	pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i {
2478	_mm256_setr_epi16(e00:a, e01:a, e02:a, e03:a, e04:a, e05:a, e06:a, e07:a, e08:a, e09:a, e10:a, e11:a, e12:a, e13:a, e14:a, e15:a)
2479	}
2480
2481	/// Broadcasts 32-bit integer `a` to all elements of returned vector.
2482	/// This intrinsic may generate the `vpbroadcastd`.
2483	///
2484	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi32)
2485	#[inline]
2486	#[target_feature(enable = "avx")]
2487	// This intrinsic has no corresponding instruction.
2488	#[stable(feature = "simd_x86", since = "1.27.0")]
2489	pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i {
2490	_mm256_setr_epi32(e0:a, e1:a, e2:a, e3:a, e4:a, e5:a, e6:a, e7:a)
2491	}
2492
2493	/// Broadcasts 64-bit integer `a` to all elements of returned vector.
2494	/// This intrinsic may generate the `vpbroadcastq`.
2495	///
2496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi64x)
2497	#[inline]
2498	#[target_feature(enable = "avx")]
2499	#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(vinsertf128))]
2500	#[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))]
2501	// This intrinsic has no corresponding instruction.
2502	#[stable(feature = "simd_x86", since = "1.27.0")]
2503	pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i {
2504	_mm256_setr_epi64x(a, b:a, c:a, d:a)
2505	}
2506
2507	/// Cast vector of type __m256d to type __m256.
2508	///
2509	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd_ps)
2510	#[inline]
2511	#[target_feature(enable = "avx")]
2512	// This intrinsic is only used for compilation and does not generate any
2513	// instructions, thus it has zero latency.
2514	#[stable(feature = "simd_x86", since = "1.27.0")]
2515	pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 {
2516	transmute(src:a)
2517	}
2518
2519	/// Cast vector of type __m256 to type __m256d.
2520	///
2521	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_pd)
2522	#[inline]
2523	#[target_feature(enable = "avx")]
2524	// This intrinsic is only used for compilation and does not generate any
2525	// instructions, thus it has zero latency.
2526	#[stable(feature = "simd_x86", since = "1.27.0")]
2527	pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d {
2528	transmute(src:a)
2529	}
2530
2531	/// Casts vector of type __m256 to type __m256i.
2532	///
2533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_si256)
2534	#[inline]
2535	#[target_feature(enable = "avx")]
2536	// This intrinsic is only used for compilation and does not generate any
2537	// instructions, thus it has zero latency.
2538	#[stable(feature = "simd_x86", since = "1.27.0")]
2539	pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i {
2540	transmute(src:a)
2541	}
2542
2543	/// Casts vector of type __m256i to type __m256.
2544	///
2545	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_ps)
2546	#[inline]
2547	#[target_feature(enable = "avx")]
2548	// This intrinsic is only used for compilation and does not generate any
2549	// instructions, thus it has zero latency.
2550	#[stable(feature = "simd_x86", since = "1.27.0")]
2551	pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
2552	transmute(src:a)
2553	}
2554
2555	/// Casts vector of type __m256d to type __m256i.
2556	///
2557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd_si256)
2558	#[inline]
2559	#[target_feature(enable = "avx")]
2560	// This intrinsic is only used for compilation and does not generate any
2561	// instructions, thus it has zero latency.
2562	#[stable(feature = "simd_x86", since = "1.27.0")]
2563	pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i {
2564	transmute(src:a)
2565	}
2566
2567	/// Casts vector of type __m256i to type __m256d.
2568	///
2569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_pd)
2570	#[inline]
2571	#[target_feature(enable = "avx")]
2572	// This intrinsic is only used for compilation and does not generate any
2573	// instructions, thus it has zero latency.
2574	#[stable(feature = "simd_x86", since = "1.27.0")]
2575	pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
2576	transmute(src:a)
2577	}
2578
2579	/// Casts vector of type __m256 to type __m128.
2580	///
2581	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps256_ps128)
2582	#[inline]
2583	#[target_feature(enable = "avx")]
2584	// This intrinsic is only used for compilation and does not generate any
2585	// instructions, thus it has zero latency.
2586	#[stable(feature = "simd_x86", since = "1.27.0")]
2587	pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 {
2588	simd_shuffle!(a, a, [`0`, `1`, `2`, `3`])
2589	}
2590
2591	/// Casts vector of type __m256d to type __m128d.
2592	///
2593	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd256_pd128)
2594	#[inline]
2595	#[target_feature(enable = "avx")]
2596	// This intrinsic is only used for compilation and does not generate any
2597	// instructions, thus it has zero latency.
2598	#[stable(feature = "simd_x86", since = "1.27.0")]
2599	pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
2600	simd_shuffle!(a, a, [`0`, `1`])
2601	}
2602
2603	/// Casts vector of type __m256i to type __m128i.
2604	///
2605	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_si128)
2606	#[inline]
2607	#[target_feature(enable = "avx")]
2608	// This intrinsic is only used for compilation and does not generate any
2609	// instructions, thus it has zero latency.
2610	#[stable(feature = "simd_x86", since = "1.27.0")]
2611	pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
2612	let a: i64x4 = a.as_i64x4();
2613	let dst: i64x2 = simd_shuffle!(a, a, [`0`, `1`]);
2614	transmute(src:dst)
2615	}
2616
2617	/// Casts vector of type __m128 to type __m256;
2618	/// the upper 128 bits of the result are undefined.
2619	///
2620	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps128_ps256)
2621	#[inline]
2622	#[target_feature(enable = "avx")]
2623	// This intrinsic is only used for compilation and does not generate any
2624	// instructions, thus it has zero latency.
2625	#[stable(feature = "simd_x86", since = "1.27.0")]
2626	pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2627	// FIXME simd_shuffle!(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
2628	simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `0`, `0`, `0`])
2629	}
2630
2631	/// Casts vector of type __m128d to type __m256d;
2632	/// the upper 128 bits of the result are undefined.
2633	///
2634	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd128_pd256)
2635	#[inline]
2636	#[target_feature(enable = "avx")]
2637	// This intrinsic is only used for compilation and does not generate any
2638	// instructions, thus it has zero latency.
2639	#[stable(feature = "simd_x86", since = "1.27.0")]
2640	pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2641	// FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2642	simd_shuffle!(a, a, [`0`, `1`, `0`, `0`])
2643	}
2644
2645	/// Casts vector of type __m128i to type __m256i;
2646	/// the upper 128 bits of the result are undefined.
2647	///
2648	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi128_si256)
2649	#[inline]
2650	#[target_feature(enable = "avx")]
2651	// This intrinsic is only used for compilation and does not generate any
2652	// instructions, thus it has zero latency.
2653	#[stable(feature = "simd_x86", since = "1.27.0")]
2654	pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
2655	let a: i64x2 = a.as_i64x2();
2656	// FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2657	let dst: i64x4 = simd_shuffle!(a, a, [`0`, `1`, `0`, `0`]);
2658	transmute(src:dst)
2659	}
2660
2661	/// Constructs a 256-bit floating-point vector of `[8 x float]` from a
2662	/// 128-bit floating-point vector of `[4 x float]`. The lower 128 bits contain
2663	/// the value of the source vector. The upper 128 bits are set to zero.
2664	///
2665	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextps128_ps256)
2666	#[inline]
2667	#[target_feature(enable = "avx,sse")]
2668	// This intrinsic is only used for compilation and does not generate any
2669	// instructions, thus it has zero latency.
2670	#[stable(feature = "simd_x86", since = "1.27.0")]
2671	pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
2672	simd_shuffle!(a, _mm_setzero_ps(), [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`])
2673	}
2674
2675	/// Constructs a 256-bit integer vector from a 128-bit integer vector.
2676	/// The lower 128 bits contain the value of the source vector. The upper
2677	/// 128 bits are set to zero.
2678	///
2679	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextsi128_si256)
2680	#[inline]
2681	#[target_feature(enable = "avx,sse2")]
2682	// This intrinsic is only used for compilation and does not generate any
2683	// instructions, thus it has zero latency.
2684	#[stable(feature = "simd_x86", since = "1.27.0")]
2685	pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2686	let b: i64x2 = _mm_setzero_si128().as_i64x2();
2687	let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [`0`, `1`, `2`, `3`]);
2688	transmute(src:dst)
2689	}
2690
2691	/// Constructs a 256-bit floating-point vector of `[4 x double]` from a
2692	/// 128-bit floating-point vector of `[2 x double]`. The lower 128 bits
2693	/// contain the value of the source vector. The upper 128 bits are set
2694	/// to zero.
2695	///
2696	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextpd128_pd256)
2697	#[inline]
2698	#[target_feature(enable = "avx,sse2")]
2699	// This intrinsic is only used for compilation and does not generate any
2700	// instructions, thus it has zero latency.
2701	#[stable(feature = "simd_x86", since = "1.27.0")]
2702	pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
2703	simd_shuffle!(a, _mm_setzero_pd(), [`0`, `1`, `2`, `3`])
2704	}
2705
2706	/// Returns vector of type `__m256` with indeterminate elements.
2707	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2708	/// In practice, this is equivalent to [`mem::zeroed`].
2709	///
2710	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_ps)
2711	#[inline]
2712	#[target_feature(enable = "avx")]
2713	// This intrinsic has no corresponding instruction.
2714	#[stable(feature = "simd_x86", since = "1.27.0")]
2715	pub unsafe fn _mm256_undefined_ps() -> __m256 {
2716	_mm256_set1_ps(`0.0`)
2717	}
2718
2719	/// Returns vector of type `__m256d` with indeterminate elements.
2720	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2721	/// In practice, this is equivalent to [`mem::zeroed`].
2722	///
2723	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_pd)
2724	#[inline]
2725	#[target_feature(enable = "avx")]
2726	// This intrinsic has no corresponding instruction.
2727	#[stable(feature = "simd_x86", since = "1.27.0")]
2728	pub unsafe fn _mm256_undefined_pd() -> __m256d {
2729	_mm256_set1_pd(`0.0`)
2730	}
2731
2732	/// Returns vector of type __m256i with with indeterminate elements.
2733	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2734	/// In practice, this is equivalent to [`mem::zeroed`].
2735	///
2736	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_si256)
2737	#[inline]
2738	#[target_feature(enable = "avx")]
2739	// This intrinsic has no corresponding instruction.
2740	#[stable(feature = "simd_x86", since = "1.27.0")]
2741	pub unsafe fn _mm256_undefined_si256() -> __m256i {
2742	__m256i(`0`, `0`, `0`, `0`)
2743	}
2744
2745	/// Sets packed __m256 returned vector with the supplied values.
2746	///
2747	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128)
2748	#[inline]
2749	#[target_feature(enable = "avx")]
2750	#[cfg_attr(test, assert_instr(vinsertf128))]
2751	#[stable(feature = "simd_x86", since = "1.27.0")]
2752	pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
2753	simd_shuffle!(lo, hi, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`])
2754	}
2755
2756	/// Sets packed __m256d returned vector with the supplied values.
2757	///
2758	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128d)
2759	#[inline]
2760	#[target_feature(enable = "avx")]
2761	#[cfg_attr(test, assert_instr(vinsertf128))]
2762	#[stable(feature = "simd_x86", since = "1.27.0")]
2763	pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
2764	let hi: __m128 = transmute(src:hi);
2765	let lo: __m128 = transmute(src:lo);
2766	transmute(src:_mm256_set_m128(hi, lo))
2767	}
2768
2769	/// Sets packed __m256i returned vector with the supplied values.
2770	///
2771	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128i)
2772	#[inline]
2773	#[target_feature(enable = "avx")]
2774	#[cfg_attr(test, assert_instr(vinsertf128))]
2775	#[stable(feature = "simd_x86", since = "1.27.0")]
2776	pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
2777	let hi: __m128 = transmute(src:hi);
2778	let lo: __m128 = transmute(src:lo);
2779	transmute(src:_mm256_set_m128(hi, lo))
2780	}
2781
2782	/// Sets packed __m256 returned vector with the supplied values.
2783	///
2784	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128)
2785	#[inline]
2786	#[target_feature(enable = "avx")]
2787	#[cfg_attr(test, assert_instr(vinsertf128))]
2788	#[stable(feature = "simd_x86", since = "1.27.0")]
2789	pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
2790	_mm256_set_m128(hi, lo)
2791	}
2792
2793	/// Sets packed __m256d returned vector with the supplied values.
2794	///
2795	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128d)
2796	#[inline]
2797	#[target_feature(enable = "avx")]
2798	#[cfg_attr(test, assert_instr(vinsertf128))]
2799	#[stable(feature = "simd_x86", since = "1.27.0")]
2800	pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
2801	_mm256_set_m128d(hi, lo)
2802	}
2803
2804	/// Sets packed __m256i returned vector with the supplied values.
2805	///
2806	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128i)
2807	#[inline]
2808	#[target_feature(enable = "avx")]
2809	#[cfg_attr(test, assert_instr(vinsertf128))]
2810	#[stable(feature = "simd_x86", since = "1.27.0")]
2811	pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
2812	_mm256_set_m128i(hi, lo)
2813	}
2814
2815	/// Loads two 128-bit values (composed of 4 packed single-precision (32-bit)
2816	/// floating-point elements) from memory, and combine them into a 256-bit
2817	/// value.
2818	/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2819	///
2820	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128)
2821	#[inline]
2822	#[target_feature(enable = "avx,sse")]
2823	// This intrinsic has no corresponding instruction.
2824	#[stable(feature = "simd_x86", since = "1.27.0")]
2825	pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
2826	let a: __m256 = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
2827	_mm256_insertf128_ps::<`1`>(a, b:_mm_loadu_ps(hiaddr))
2828	}
2829
2830	/// Loads two 128-bit values (composed of 2 packed double-precision (64-bit)
2831	/// floating-point elements) from memory, and combine them into a 256-bit
2832	/// value.
2833	/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2834	///
2835	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128d)
2836	#[inline]
2837	#[target_feature(enable = "avx,sse2")]
2838	// This intrinsic has no corresponding instruction.
2839	#[stable(feature = "simd_x86", since = "1.27.0")]
2840	pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
2841	let a: __m256d = _mm256_castpd128_pd256(_mm_loadu_pd(mem_addr:loaddr));
2842	_mm256_insertf128_pd::<`1`>(a, b:_mm_loadu_pd(mem_addr:hiaddr))
2843	}
2844
2845	/// Loads two 128-bit values (composed of integer data) from memory, and combine
2846	/// them into a 256-bit value.
2847	/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2848	///
2849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128i)
2850	#[inline]
2851	#[target_feature(enable = "avx,sse2")]
2852	// This intrinsic has no corresponding instruction.
2853	#[stable(feature = "simd_x86", since = "1.27.0")]
2854	pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
2855	let a: __m256i = _mm256_castsi128_si256(_mm_loadu_si128(mem_addr:loaddr));
2856	_mm256_insertf128_si256::<`1`>(a, b:_mm_loadu_si128(mem_addr:hiaddr))
2857	}
2858
2859	/// Stores the high and low 128-bit halves (each composed of 4 packed
2860	/// single-precision (32-bit) floating-point elements) from `a` into memory two
2861	/// different 128-bit locations.
2862	/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2863	///
2864	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128)
2865	#[inline]
2866	#[target_feature(enable = "avx,sse")]
2867	// This intrinsic has no corresponding instruction.
2868	#[stable(feature = "simd_x86", since = "1.27.0")]
2869	pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
2870	let lo: __m128 = _mm256_castps256_ps128(a);
2871	_mm_storeu_ps(p:loaddr, a:lo);
2872	let hi: __m128 = _mm256_extractf128_ps::<`1`>(a);
2873	_mm_storeu_ps(p:hiaddr, a:hi);
2874	}
2875
2876	/// Stores the high and low 128-bit halves (each composed of 2 packed
2877	/// double-precision (64-bit) floating-point elements) from `a` into memory two
2878	/// different 128-bit locations.
2879	/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2880	///
2881	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128d)
2882	#[inline]
2883	#[target_feature(enable = "avx,sse2")]
2884	// This intrinsic has no corresponding instruction.
2885	#[stable(feature = "simd_x86", since = "1.27.0")]
2886	pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
2887	let lo: __m128d = _mm256_castpd256_pd128(a);
2888	_mm_storeu_pd(mem_addr:loaddr, a:lo);
2889	let hi: __m128d = _mm256_extractf128_pd::<`1`>(a);
2890	_mm_storeu_pd(mem_addr:hiaddr, a:hi);
2891	}
2892
2893	/// Stores the high and low 128-bit halves (each composed of integer data) from
2894	/// `a` into memory two different 128-bit locations.
2895	/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2896	///
2897	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128i)
2898	#[inline]
2899	#[target_feature(enable = "avx,sse2")]
2900	// This intrinsic has no corresponding instruction.
2901	#[stable(feature = "simd_x86", since = "1.27.0")]
2902	pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
2903	let lo: __m128i = _mm256_castsi256_si128(a);
2904	_mm_storeu_si128(mem_addr:loaddr, a:lo);
2905	let hi: __m128i = _mm256_extractf128_si256::<`1`>(a);
2906	_mm_storeu_si128(mem_addr:hiaddr, a:hi);
2907	}
2908
2909	/// Returns the first element of the input vector of `[8 x float]`.
2910	///
2911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtss_f32)
2912	#[inline]
2913	#[target_feature(enable = "avx")]
2914	//#[cfg_attr(test, assert_instr(movss))] FIXME
2915	#[stable(feature = "simd_x86", since = "1.27.0")]
2916	pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 {
2917	simd_extract(x:a, idx:`0`)
2918	}
2919
2920	// LLVM intrinsics used in the above functions
2921	#[allow(improper_ctypes)]
2922	extern "C" {
2923	#[link_name = "llvm.x86.avx.round.pd.256"]
2924	fn roundpd256(a: __m256d, b: i32) -> __m256d;
2925	#[link_name = "llvm.x86.avx.round.ps.256"]
2926	fn roundps256(a: __m256, b: i32) -> __m256;
2927	#[link_name = "llvm.x86.avx.sqrt.ps.256"]
2928	fn sqrtps256(a: __m256) -> __m256;
2929	#[link_name = "llvm.x86.avx.dp.ps.256"]
2930	fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256;
2931	#[link_name = "llvm.x86.avx.hadd.pd.256"]
2932	fn vhaddpd(a: __m256d, b: __m256d) -> __m256d;
2933	#[link_name = "llvm.x86.avx.hadd.ps.256"]
2934	fn vhaddps(a: __m256, b: __m256) -> __m256;
2935	#[link_name = "llvm.x86.avx.hsub.pd.256"]
2936	fn vhsubpd(a: __m256d, b: __m256d) -> __m256d;
2937	#[link_name = "llvm.x86.avx.hsub.ps.256"]
2938	fn vhsubps(a: __m256, b: __m256) -> __m256;
2939	#[link_name = "llvm.x86.sse2.cmp.pd"]
2940	fn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2941	#[link_name = "llvm.x86.avx.cmp.pd.256"]
2942	fn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d;
2943	#[link_name = "llvm.x86.sse.cmp.ps"]
2944	fn vcmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
2945	#[link_name = "llvm.x86.avx.cmp.ps.256"]
2946	fn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256;
2947	#[link_name = "llvm.x86.sse2.cmp.sd"]
2948	fn vcmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2949	#[link_name = "llvm.x86.sse.cmp.ss"]
2950	fn vcmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
2951	#[link_name = "llvm.x86.avx.cvtdq2.ps.256"]
2952	fn vcvtdq2ps(a: i32x8) -> __m256;
2953	#[link_name = "llvm.x86.avx.cvt.pd2.ps.256"]
2954	fn vcvtpd2ps(a: __m256d) -> __m128;
2955	#[link_name = "llvm.x86.avx.cvt.ps2dq.256"]
2956	fn vcvtps2dq(a: __m256) -> i32x8;
2957	#[link_name = "llvm.x86.avx.cvtt.pd2dq.256"]
2958	fn vcvttpd2dq(a: __m256d) -> i32x4;
2959	#[link_name = "llvm.x86.avx.cvt.pd2dq.256"]
2960	fn vcvtpd2dq(a: __m256d) -> i32x4;
2961	#[link_name = "llvm.x86.avx.cvtt.ps2dq.256"]
2962	fn vcvttps2dq(a: __m256) -> i32x8;
2963	#[link_name = "llvm.x86.avx.vzeroall"]
2964	fn vzeroall();
2965	#[link_name = "llvm.x86.avx.vzeroupper"]
2966	fn vzeroupper();
2967	#[link_name = "llvm.x86.avx.vpermilvar.ps.256"]
2968	fn vpermilps256(a: __m256, b: i32x8) -> __m256;
2969	#[link_name = "llvm.x86.avx.vpermilvar.ps"]
2970	fn vpermilps(a: __m128, b: i32x4) -> __m128;
2971	#[link_name = "llvm.x86.avx.vpermilvar.pd.256"]
2972	fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
2973	#[link_name = "llvm.x86.avx.vpermilvar.pd"]
2974	fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
2975	#[link_name = "llvm.x86.avx.vperm2f128.ps.256"]
2976	fn vperm2f128ps256(a: __m256, b: __m256, imm8: i8) -> __m256;
2977	#[link_name = "llvm.x86.avx.vperm2f128.pd.256"]
2978	fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d;
2979	#[link_name = "llvm.x86.avx.vperm2f128.si.256"]
2980	fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8;
2981	#[link_name = "llvm.x86.avx.vbroadcastf128.ps.256"]
2982	fn vbroadcastf128ps256(a: &__m128) -> __m256;
2983	#[link_name = "llvm.x86.avx.vbroadcastf128.pd.256"]
2984	fn vbroadcastf128pd256(a: &__m128d) -> __m256d;
2985	#[link_name = "llvm.x86.avx.maskload.pd.256"]
2986	fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
2987	#[link_name = "llvm.x86.avx.maskstore.pd.256"]
2988	fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
2989	#[link_name = "llvm.x86.avx.maskload.pd"]
2990	fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
2991	#[link_name = "llvm.x86.avx.maskstore.pd"]
2992	fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
2993	#[link_name = "llvm.x86.avx.maskload.ps.256"]
2994	fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
2995	#[link_name = "llvm.x86.avx.maskstore.ps.256"]
2996	fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
2997	#[link_name = "llvm.x86.avx.maskload.ps"]
2998	fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
2999	#[link_name = "llvm.x86.avx.maskstore.ps"]
3000	fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
3001	#[link_name = "llvm.x86.avx.ldu.dq.256"]
3002	fn vlddqu(mem_addr: *const i8) -> i8x32;
3003	#[link_name = "llvm.x86.avx.rcp.ps.256"]
3004	fn vrcpps(a: __m256) -> __m256;
3005	#[link_name = "llvm.x86.avx.rsqrt.ps.256"]
3006	fn vrsqrtps(a: __m256) -> __m256;
3007	#[link_name = "llvm.x86.avx.ptestz.256"]
3008	fn ptestz256(a: i64x4, b: i64x4) -> i32;
3009	#[link_name = "llvm.x86.avx.ptestc.256"]
3010	fn ptestc256(a: i64x4, b: i64x4) -> i32;
3011	#[link_name = "llvm.x86.avx.ptestnzc.256"]
3012	fn ptestnzc256(a: i64x4, b: i64x4) -> i32;
3013	#[link_name = "llvm.x86.avx.vtestz.pd.256"]
3014	fn vtestzpd256(a: __m256d, b: __m256d) -> i32;
3015	#[link_name = "llvm.x86.avx.vtestc.pd.256"]
3016	fn vtestcpd256(a: __m256d, b: __m256d) -> i32;
3017	#[link_name = "llvm.x86.avx.vtestnzc.pd.256"]
3018	fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32;
3019	#[link_name = "llvm.x86.avx.vtestz.pd"]
3020	fn vtestzpd(a: __m128d, b: __m128d) -> i32;
3021	#[link_name = "llvm.x86.avx.vtestc.pd"]
3022	fn vtestcpd(a: __m128d, b: __m128d) -> i32;
3023	#[link_name = "llvm.x86.avx.vtestnzc.pd"]
3024	fn vtestnzcpd(a: __m128d, b: __m128d) -> i32;
3025	#[link_name = "llvm.x86.avx.vtestz.ps.256"]
3026	fn vtestzps256(a: __m256, b: __m256) -> i32;
3027	#[link_name = "llvm.x86.avx.vtestc.ps.256"]
3028	fn vtestcps256(a: __m256, b: __m256) -> i32;
3029	#[link_name = "llvm.x86.avx.vtestnzc.ps.256"]
3030	fn vtestnzcps256(a: __m256, b: __m256) -> i32;
3031	#[link_name = "llvm.x86.avx.vtestz.ps"]
3032	fn vtestzps(a: __m128, b: __m128) -> i32;
3033	#[link_name = "llvm.x86.avx.vtestc.ps"]
3034	fn vtestcps(a: __m128, b: __m128) -> i32;
3035	#[link_name = "llvm.x86.avx.vtestnzc.ps"]
3036	fn vtestnzcps(a: __m128, b: __m128) -> i32;
3037	#[link_name = "llvm.x86.avx.min.ps.256"]
3038	fn vminps(a: __m256, b: __m256) -> __m256;
3039	#[link_name = "llvm.x86.avx.max.ps.256"]
3040	fn vmaxps(a: __m256, b: __m256) -> __m256;
3041	#[link_name = "llvm.x86.avx.min.pd.256"]
3042	fn vminpd(a: __m256d, b: __m256d) -> __m256d;
3043	#[link_name = "llvm.x86.avx.max.pd.256"]
3044	fn vmaxpd(a: __m256d, b: __m256d) -> __m256d;
3045	}
3046
3047	#[cfg(test)]
3048	mod tests {
3049	use crate::hint::black_box;
3050	use stdarch_test::simd_test;
3051
3052	use crate::core_arch::x86::*;
3053
3054	#[simd_test(enable = "avx")]
3055	unsafe fn test_mm256_add_pd() {
3056	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3057	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3058	let r = _mm256_add_pd(a, b);
3059	let e = _mm256_setr_pd(`6.`, `8.`, `10.`, `12.`);
3060	assert_eq_m256d(r, e);
3061	}
3062
3063	#[simd_test(enable = "avx")]
3064	unsafe fn test_mm256_add_ps() {
3065	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
3066	let b = _mm256_setr_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
3067	let r = _mm256_add_ps(a, b);
3068	let e = _mm256_setr_ps(`10.`, `12.`, `14.`, `16.`, `18.`, `20.`, `22.`, `24.`);
3069	assert_eq_m256(r, e);
3070	}
3071
3072	#[simd_test(enable = "avx")]
3073	unsafe fn test_mm256_and_pd() {
3074	let a = _mm256_set1_pd(`1.`);
3075	let b = _mm256_set1_pd(`0.6`);
3076	let r = _mm256_and_pd(a, b);
3077	let e = _mm256_set1_pd(`0.5`);
3078	assert_eq_m256d(r, e);
3079	}
3080
3081	#[simd_test(enable = "avx")]
3082	unsafe fn test_mm256_and_ps() {
3083	let a = _mm256_set1_ps(`1.`);
3084	let b = _mm256_set1_ps(`0.6`);
3085	let r = _mm256_and_ps(a, b);
3086	let e = _mm256_set1_ps(`0.5`);
3087	assert_eq_m256(r, e);
3088	}
3089
3090	#[simd_test(enable = "avx")]
3091	unsafe fn test_mm256_or_pd() {
3092	let a = _mm256_set1_pd(`1.`);
3093	let b = _mm256_set1_pd(`0.6`);
3094	let r = _mm256_or_pd(a, b);
3095	let e = _mm256_set1_pd(`1.2`);
3096	assert_eq_m256d(r, e);
3097	}
3098
3099	#[simd_test(enable = "avx")]
3100	unsafe fn test_mm256_or_ps() {
3101	let a = _mm256_set1_ps(`1.`);
3102	let b = _mm256_set1_ps(`0.6`);
3103	let r = _mm256_or_ps(a, b);
3104	let e = _mm256_set1_ps(`1.2`);
3105	assert_eq_m256(r, e);
3106	}
3107
3108	#[simd_test(enable = "avx")]
3109	unsafe fn test_mm256_shuffle_pd() {
3110	let a = _mm256_setr_pd(`1.`, `4.`, `5.`, `8.`);
3111	let b = _mm256_setr_pd(`2.`, `3.`, `6.`, `7.`);
3112	let r = _mm256_shuffle_pd::<`0b11_11_11_11`>(a, b);
3113	let e = _mm256_setr_pd(`4.`, `3.`, `8.`, `7.`);
3114	assert_eq_m256d(r, e);
3115	}
3116
3117	#[simd_test(enable = "avx")]
3118	unsafe fn test_mm256_shuffle_ps() {
3119	let a = _mm256_setr_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
3120	let b = _mm256_setr_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
3121	let r = _mm256_shuffle_ps::<`0b00_00_11_11`>(a, b);
3122	let e = _mm256_setr_ps(`8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`);
3123	assert_eq_m256(r, e);
3124	}
3125
3126	#[simd_test(enable = "avx")]
3127	unsafe fn test_mm256_andnot_pd() {
3128	let a = _mm256_set1_pd(`0.`);
3129	let b = _mm256_set1_pd(`0.6`);
3130	let r = _mm256_andnot_pd(a, b);
3131	assert_eq_m256d(r, b);
3132	}
3133
3134	#[simd_test(enable = "avx")]
3135	unsafe fn test_mm256_andnot_ps() {
3136	let a = _mm256_set1_ps(`0.`);
3137	let b = _mm256_set1_ps(`0.6`);
3138	let r = _mm256_andnot_ps(a, b);
3139	assert_eq_m256(r, b);
3140	}
3141
3142	#[simd_test(enable = "avx")]
3143	unsafe fn test_mm256_max_pd() {
3144	let a = _mm256_setr_pd(`1.`, `4.`, `5.`, `8.`);
3145	let b = _mm256_setr_pd(`2.`, `3.`, `6.`, `7.`);
3146	let r = _mm256_max_pd(a, b);
3147	let e = _mm256_setr_pd(`2.`, `4.`, `6.`, `8.`);
3148	assert_eq_m256d(r, e);
3149	// > If the values being compared are both 0.0s (of either sign), the
3150	// > value in the second operand (source operand) is returned.
3151	let w = _mm256_max_pd(_mm256_set1_pd(`0.0`), _mm256_set1_pd(`-0.0`));
3152	let x = _mm256_max_pd(_mm256_set1_pd(`-0.0`), _mm256_set1_pd(`0.0`));
3153	let wu: [u64; `4`] = transmute(w);
3154	let xu: [u64; `4`] = transmute(x);
3155	assert_eq!(wu, [`0x8000_0000_0000_0000u64`; `4`]);
3156	assert_eq!(xu, [`0u64`; `4`]);
3157	// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3158	// > second operand (source operand), either a NaN or a valid
3159	// > floating-point value, is written to the result.
3160	let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(`0.0`));
3161	let z = _mm256_max_pd(_mm256_set1_pd(`0.0`), _mm256_set1_pd(f64::NAN));
3162	let yf: [f64; `4`] = transmute(y);
3163	let zf: [f64; `4`] = transmute(z);
3164	assert_eq!(yf, [`0.0`; `4`]);
3165	assert!(zf.iter().all(\|f\| f.is_nan()), "{:?}", zf);
3166	}
3167
3168	#[simd_test(enable = "avx")]
3169	unsafe fn test_mm256_max_ps() {
3170	let a = _mm256_setr_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
3171	let b = _mm256_setr_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
3172	let r = _mm256_max_ps(a, b);
3173	let e = _mm256_setr_ps(`2.`, `4.`, `6.`, `8.`, `10.`, `12.`, `14.`, `16.`);
3174	assert_eq_m256(r, e);
3175	// > If the values being compared are both 0.0s (of either sign), the
3176	// > value in the second operand (source operand) is returned.
3177	let w = _mm256_max_ps(_mm256_set1_ps(`0.0`), _mm256_set1_ps(`-0.0`));
3178	let x = _mm256_max_ps(_mm256_set1_ps(`-0.0`), _mm256_set1_ps(`0.0`));
3179	let wu: [u32; `8`] = transmute(w);
3180	let xu: [u32; `8`] = transmute(x);
3181	assert_eq!(wu, [`0x8000_0000u32`; `8`]);
3182	assert_eq!(xu, [`0u32`; `8`]);
3183	// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3184	// > second operand (source operand), either a NaN or a valid
3185	// > floating-point value, is written to the result.
3186	let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(`0.0`));
3187	let z = _mm256_max_ps(_mm256_set1_ps(`0.0`), _mm256_set1_ps(f32::NAN));
3188	let yf: [f32; `8`] = transmute(y);
3189	let zf: [f32; `8`] = transmute(z);
3190	assert_eq!(yf, [`0.0`; `8`]);
3191	assert!(zf.iter().all(\|f\| f.is_nan()), "{:?}", zf);
3192	}
3193
3194	#[simd_test(enable = "avx")]
3195	unsafe fn test_mm256_min_pd() {
3196	let a = _mm256_setr_pd(`1.`, `4.`, `5.`, `8.`);
3197	let b = _mm256_setr_pd(`2.`, `3.`, `6.`, `7.`);
3198	let r = _mm256_min_pd(a, b);
3199	let e = _mm256_setr_pd(`1.`, `3.`, `5.`, `7.`);
3200	assert_eq_m256d(r, e);
3201	// > If the values being compared are both 0.0s (of either sign), the
3202	// > value in the second operand (source operand) is returned.
3203	let w = _mm256_min_pd(_mm256_set1_pd(`0.0`), _mm256_set1_pd(`-0.0`));
3204	let x = _mm256_min_pd(_mm256_set1_pd(`-0.0`), _mm256_set1_pd(`0.0`));
3205	let wu: [u64; `4`] = transmute(w);
3206	let xu: [u64; `4`] = transmute(x);
3207	assert_eq!(wu, [`0x8000_0000_0000_0000u64`; `4`]);
3208	assert_eq!(xu, [`0u64`; `4`]);
3209	// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3210	// > second operand (source operand), either a NaN or a valid
3211	// > floating-point value, is written to the result.
3212	let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(`0.0`));
3213	let z = _mm256_min_pd(_mm256_set1_pd(`0.0`), _mm256_set1_pd(f64::NAN));
3214	let yf: [f64; `4`] = transmute(y);
3215	let zf: [f64; `4`] = transmute(z);
3216	assert_eq!(yf, [`0.0`; `4`]);
3217	assert!(zf.iter().all(\|f\| f.is_nan()), "{:?}", zf);
3218	}
3219
3220	#[simd_test(enable = "avx")]
3221	unsafe fn test_mm256_min_ps() {
3222	let a = _mm256_setr_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
3223	let b = _mm256_setr_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
3224	let r = _mm256_min_ps(a, b);
3225	let e = _mm256_setr_ps(`1.`, `3.`, `5.`, `7.`, `9.`, `11.`, `13.`, `15.`);
3226	assert_eq_m256(r, e);
3227	// > If the values being compared are both 0.0s (of either sign), the
3228	// > value in the second operand (source operand) is returned.
3229	let w = _mm256_min_ps(_mm256_set1_ps(`0.0`), _mm256_set1_ps(`-0.0`));
3230	let x = _mm256_min_ps(_mm256_set1_ps(`-0.0`), _mm256_set1_ps(`0.0`));
3231	let wu: [u32; `8`] = transmute(w);
3232	let xu: [u32; `8`] = transmute(x);
3233	assert_eq!(wu, [`0x8000_0000u32`; `8`]);
3234	assert_eq!(xu, [`0u32`; `8`]);
3235	// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3236	// > second operand (source operand), either a NaN or a valid
3237	// > floating-point value, is written to the result.
3238	let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(`0.0`));
3239	let z = _mm256_min_ps(_mm256_set1_ps(`0.0`), _mm256_set1_ps(f32::NAN));
3240	let yf: [f32; `8`] = transmute(y);
3241	let zf: [f32; `8`] = transmute(z);
3242	assert_eq!(yf, [`0.0`; `8`]);
3243	assert!(zf.iter().all(\|f\| f.is_nan()), "{:?}", zf);
3244	}
3245
3246	#[simd_test(enable = "avx")]
3247	unsafe fn test_mm256_mul_pd() {
3248	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3249	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3250	let r = _mm256_mul_pd(a, b);
3251	let e = _mm256_setr_pd(`5.`, `12.`, `21.`, `32.`);
3252	assert_eq_m256d(r, e);
3253	}
3254
3255	#[simd_test(enable = "avx")]
3256	unsafe fn test_mm256_mul_ps() {
3257	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
3258	let b = _mm256_setr_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
3259	let r = _mm256_mul_ps(a, b);
3260	let e = _mm256_setr_ps(`9.`, `20.`, `33.`, `48.`, `65.`, `84.`, `105.`, `128.`);
3261	assert_eq_m256(r, e);
3262	}
3263
3264	#[simd_test(enable = "avx")]
3265	unsafe fn test_mm256_addsub_pd() {
3266	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3267	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3268	let r = _mm256_addsub_pd(a, b);
3269	let e = _mm256_setr_pd(`-4.`, `8.`, `-4.`, `12.`);
3270	assert_eq_m256d(r, e);
3271	}
3272
3273	#[simd_test(enable = "avx")]
3274	unsafe fn test_mm256_addsub_ps() {
3275	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `1.`, `2.`, `3.`, `4.`);
3276	let b = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `5.`, `6.`, `7.`, `8.`);
3277	let r = _mm256_addsub_ps(a, b);
3278	let e = _mm256_setr_ps(`-4.`, `8.`, `-4.`, `12.`, `-4.`, `8.`, `-4.`, `12.`);
3279	assert_eq_m256(r, e);
3280	}
3281
3282	#[simd_test(enable = "avx")]
3283	unsafe fn test_mm256_sub_pd() {
3284	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3285	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3286	let r = _mm256_sub_pd(a, b);
3287	let e = _mm256_setr_pd(`-4.`, `-4.`, `-4.`, `-4.`);
3288	assert_eq_m256d(r, e);
3289	}
3290
3291	#[simd_test(enable = "avx")]
3292	unsafe fn test_mm256_sub_ps() {
3293	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `-1.`, `-2.`, `-3.`, `-4.`);
3294	let b = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `3.`, `2.`, `1.`, `0.`);
3295	let r = _mm256_sub_ps(a, b);
3296	let e = _mm256_setr_ps(`-4.`, `-4.`, `-4.`, `-4.`, `-4.`, `-4.`, `-4.`, `-4.`);
3297	assert_eq_m256(r, e);
3298	}
3299
3300	#[simd_test(enable = "avx")]
3301	unsafe fn test_mm256_round_pd() {
3302	let a = _mm256_setr_pd(`1.55`, `2.2`, `3.99`, `-1.2`);
3303	let result_closest = _mm256_round_pd::<`0b0000`>(a);
3304	let result_down = _mm256_round_pd::<`0b0001`>(a);
3305	let result_up = _mm256_round_pd::<`0b0010`>(a);
3306	let expected_closest = _mm256_setr_pd(`2.`, `2.`, `4.`, `-1.`);
3307	let expected_down = _mm256_setr_pd(`1.`, `2.`, `3.`, `-2.`);
3308	let expected_up = _mm256_setr_pd(`2.`, `3.`, `4.`, `-1.`);
3309	assert_eq_m256d(result_closest, expected_closest);
3310	assert_eq_m256d(result_down, expected_down);
3311	assert_eq_m256d(result_up, expected_up);
3312	}
3313
3314	#[simd_test(enable = "avx")]
3315	unsafe fn test_mm256_floor_pd() {
3316	let a = _mm256_setr_pd(`1.55`, `2.2`, `3.99`, `-1.2`);
3317	let result_down = _mm256_floor_pd(a);
3318	let expected_down = _mm256_setr_pd(`1.`, `2.`, `3.`, `-2.`);
3319	assert_eq_m256d(result_down, expected_down);
3320	}
3321
3322	#[simd_test(enable = "avx")]
3323	unsafe fn test_mm256_ceil_pd() {
3324	let a = _mm256_setr_pd(`1.55`, `2.2`, `3.99`, `-1.2`);
3325	let result_up = _mm256_ceil_pd(a);
3326	let expected_up = _mm256_setr_pd(`2.`, `3.`, `4.`, `-1.`);
3327	assert_eq_m256d(result_up, expected_up);
3328	}
3329
3330	#[simd_test(enable = "avx")]
3331	unsafe fn test_mm256_round_ps() {
3332	let a = _mm256_setr_ps(`1.55`, `2.2`, `3.99`, `-1.2`, `1.55`, `2.2`, `3.99`, `-1.2`);
3333	let result_closest = _mm256_round_ps::<`0b0000`>(a);
3334	let result_down = _mm256_round_ps::<`0b0001`>(a);
3335	let result_up = _mm256_round_ps::<`0b0010`>(a);
3336	let expected_closest = _mm256_setr_ps(`2.`, `2.`, `4.`, `-1.`, `2.`, `2.`, `4.`, `-1.`);
3337	let expected_down = _mm256_setr_ps(`1.`, `2.`, `3.`, `-2.`, `1.`, `2.`, `3.`, `-2.`);
3338	let expected_up = _mm256_setr_ps(`2.`, `3.`, `4.`, `-1.`, `2.`, `3.`, `4.`, `-1.`);
3339	assert_eq_m256(result_closest, expected_closest);
3340	assert_eq_m256(result_down, expected_down);
3341	assert_eq_m256(result_up, expected_up);
3342	}
3343
3344	#[simd_test(enable = "avx")]
3345	unsafe fn test_mm256_floor_ps() {
3346	let a = _mm256_setr_ps(`1.55`, `2.2`, `3.99`, `-1.2`, `1.55`, `2.2`, `3.99`, `-1.2`);
3347	let result_down = _mm256_floor_ps(a);
3348	let expected_down = _mm256_setr_ps(`1.`, `2.`, `3.`, `-2.`, `1.`, `2.`, `3.`, `-2.`);
3349	assert_eq_m256(result_down, expected_down);
3350	}
3351
3352	#[simd_test(enable = "avx")]
3353	unsafe fn test_mm256_ceil_ps() {
3354	let a = _mm256_setr_ps(`1.55`, `2.2`, `3.99`, `-1.2`, `1.55`, `2.2`, `3.99`, `-1.2`);
3355	let result_up = _mm256_ceil_ps(a);
3356	let expected_up = _mm256_setr_ps(`2.`, `3.`, `4.`, `-1.`, `2.`, `3.`, `4.`, `-1.`);
3357	assert_eq_m256(result_up, expected_up);
3358	}
3359
3360	#[simd_test(enable = "avx")]
3361	unsafe fn test_mm256_sqrt_pd() {
3362	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3363	let r = _mm256_sqrt_pd(a);
3364	let e = _mm256_setr_pd(`2.`, `3.`, `4.`, `5.`);
3365	assert_eq_m256d(r, e);
3366	}
3367
3368	#[simd_test(enable = "avx")]
3369	unsafe fn test_mm256_sqrt_ps() {
3370	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3371	let r = _mm256_sqrt_ps(a);
3372	let e = _mm256_setr_ps(`2.`, `3.`, `4.`, `5.`, `2.`, `3.`, `4.`, `5.`);
3373	assert_eq_m256(r, e);
3374	}
3375
3376	#[simd_test(enable = "avx")]
3377	unsafe fn test_mm256_div_ps() {
3378	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3379	let b = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3380	let r = _mm256_div_ps(a, b);
3381	let e = _mm256_setr_ps(`1.`, `3.`, `8.`, `5.`, `0.5`, `1.`, `0.25`, `0.5`);
3382	assert_eq_m256(r, e);
3383	}
3384
3385	#[simd_test(enable = "avx")]
3386	unsafe fn test_mm256_div_pd() {
3387	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3388	let b = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3389	let r = _mm256_div_pd(a, b);
3390	let e = _mm256_setr_pd(`1.`, `3.`, `8.`, `5.`);
3391	assert_eq_m256d(r, e);
3392	}
3393
3394	#[simd_test(enable = "avx")]
3395	unsafe fn test_mm256_blend_pd() {
3396	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3397	let b = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3398	let r = _mm256_blend_pd::<`0x0`>(a, b);
3399	assert_eq_m256d(r, _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`));
3400	let r = _mm256_blend_pd::<`0x3`>(a, b);
3401	assert_eq_m256d(r, _mm256_setr_pd(`4.`, `3.`, `16.`, `25.`));
3402	let r = _mm256_blend_pd::<`0xF`>(a, b);
3403	assert_eq_m256d(r, _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`));
3404	}
3405
3406	#[simd_test(enable = "avx")]
3407	unsafe fn test_mm256_blend_ps() {
3408	let a = _mm256_setr_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
3409	let b = _mm256_setr_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
3410	let r = _mm256_blend_ps::<`0x0`>(a, b);
3411	assert_eq_m256(r, _mm256_setr_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`));
3412	let r = _mm256_blend_ps::<`0x3`>(a, b);
3413	assert_eq_m256(r, _mm256_setr_ps(`2.`, `3.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`));
3414	let r = _mm256_blend_ps::<`0xF`>(a, b);
3415	assert_eq_m256(r, _mm256_setr_ps(`2.`, `3.`, `6.`, `7.`, `9.`, `12.`, `13.`, `16.`));
3416	}
3417
3418	#[simd_test(enable = "avx")]
3419	unsafe fn test_mm256_blendv_pd() {
3420	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3421	let b = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3422	let c = _mm256_setr_pd(`0.`, `0.`, !`0` as f64, !`0` as f64);
3423	let r = _mm256_blendv_pd(a, b, c);
3424	let e = _mm256_setr_pd(`4.`, `9.`, `2.`, `5.`);
3425	assert_eq_m256d(r, e);
3426	}
3427
3428	#[simd_test(enable = "avx")]
3429	unsafe fn test_mm256_blendv_ps() {
3430	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3431	let b = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3432	#[rustfmt::skip]
3433	let c = _mm256_setr_ps(
3434	`0.`, `0.`, `0.`, `0.`, !`0` as f32, !`0` as f32, !`0` as f32, !`0` as f32,
3435	);
3436	let r = _mm256_blendv_ps(a, b, c);
3437	let e = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `8.`, `9.`, `64.`, `50.`);
3438	assert_eq_m256(r, e);
3439	}
3440
3441	#[simd_test(enable = "avx")]
3442	unsafe fn test_mm256_dp_ps() {
3443	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3444	let b = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3445	let r = _mm256_dp_ps::<`0xFF`>(a, b);
3446	let e = _mm256_setr_ps(`200.`, `200.`, `200.`, `200.`, `2387.`, `2387.`, `2387.`, `2387.`);
3447	assert_eq_m256(r, e);
3448	}
3449
3450	#[simd_test(enable = "avx")]
3451	unsafe fn test_mm256_hadd_pd() {
3452	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3453	let b = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3454	let r = _mm256_hadd_pd(a, b);
3455	let e = _mm256_setr_pd(`13.`, `7.`, `41.`, `7.`);
3456	assert_eq_m256d(r, e);
3457
3458	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3459	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3460	let r = _mm256_hadd_pd(a, b);
3461	let e = _mm256_setr_pd(`3.`, `11.`, `7.`, `15.`);
3462	assert_eq_m256d(r, e);
3463	}
3464
3465	#[simd_test(enable = "avx")]
3466	unsafe fn test_mm256_hadd_ps() {
3467	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3468	let b = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3469	let r = _mm256_hadd_ps(a, b);
3470	let e = _mm256_setr_ps(`13.`, `41.`, `7.`, `7.`, `13.`, `41.`, `17.`, `114.`);
3471	assert_eq_m256(r, e);
3472
3473	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `1.`, `2.`, `3.`, `4.`);
3474	let b = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `5.`, `6.`, `7.`, `8.`);
3475	let r = _mm256_hadd_ps(a, b);
3476	let e = _mm256_setr_ps(`3.`, `7.`, `11.`, `15.`, `3.`, `7.`, `11.`, `15.`);
3477	assert_eq_m256(r, e);
3478	}
3479
3480	#[simd_test(enable = "avx")]
3481	unsafe fn test_mm256_hsub_pd() {
3482	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3483	let b = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3484	let r = _mm256_hsub_pd(a, b);
3485	let e = _mm256_setr_pd(`-5.`, `1.`, `-9.`, `-3.`);
3486	assert_eq_m256d(r, e);
3487
3488	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3489	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3490	let r = _mm256_hsub_pd(a, b);
3491	let e = _mm256_setr_pd(`-1.`, `-1.`, `-1.`, `-1.`);
3492	assert_eq_m256d(r, e);
3493	}
3494
3495	#[simd_test(enable = "avx")]
3496	unsafe fn test_mm256_hsub_ps() {
3497	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3498	let b = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3499	let r = _mm256_hsub_ps(a, b);
3500	let e = _mm256_setr_ps(`-5.`, `-9.`, `1.`, `-3.`, `-5.`, `-9.`, `-1.`, `14.`);
3501	assert_eq_m256(r, e);
3502
3503	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `1.`, `2.`, `3.`, `4.`);
3504	let b = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `5.`, `6.`, `7.`, `8.`);
3505	let r = _mm256_hsub_ps(a, b);
3506	let e = _mm256_setr_ps(`-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`);
3507	assert_eq_m256(r, e);
3508	}
3509
3510	#[simd_test(enable = "avx")]
3511	unsafe fn test_mm256_xor_pd() {
3512	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3513	let b = _mm256_set1_pd(`0.`);
3514	let r = _mm256_xor_pd(a, b);
3515	assert_eq_m256d(r, a);
3516	}
3517
3518	#[simd_test(enable = "avx")]
3519	unsafe fn test_mm256_xor_ps() {
3520	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3521	let b = _mm256_set1_ps(`0.`);
3522	let r = _mm256_xor_ps(a, b);
3523	assert_eq_m256(r, a);
3524	}
3525
3526	#[simd_test(enable = "avx")]
3527	unsafe fn test_mm_cmp_pd() {
3528	let a = _mm_setr_pd(`4.`, `9.`);
3529	let b = _mm_setr_pd(`4.`, `3.`);
3530	let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b);
3531	assert!(get_m128d(r, `0`).is_nan());
3532	assert!(get_m128d(r, `1`).is_nan());
3533	}
3534
3535	#[simd_test(enable = "avx")]
3536	unsafe fn test_mm256_cmp_pd() {
3537	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3538	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3539	let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b);
3540	let e = _mm256_set1_pd(`0.`);
3541	assert_eq_m256d(r, e);
3542	}
3543
3544	#[simd_test(enable = "avx")]
3545	unsafe fn test_mm_cmp_ps() {
3546	let a = _mm_setr_ps(`4.`, `3.`, `2.`, `5.`);
3547	let b = _mm_setr_ps(`4.`, `9.`, `16.`, `25.`);
3548	let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b);
3549	assert!(get_m128(r, `0`).is_nan());
3550	assert_eq!(get_m128(r, `1`), `0.`);
3551	assert_eq!(get_m128(r, `2`), `0.`);
3552	assert_eq!(get_m128(r, `3`), `0.`);
3553	}
3554
3555	#[simd_test(enable = "avx")]
3556	unsafe fn test_mm256_cmp_ps() {
3557	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `1.`, `2.`, `3.`, `4.`);
3558	let b = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `5.`, `6.`, `7.`, `8.`);
3559	let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b);
3560	let e = _mm256_set1_ps(`0.`);
3561	assert_eq_m256(r, e);
3562	}
3563
3564	#[simd_test(enable = "avx")]
3565	unsafe fn test_mm_cmp_sd() {
3566	let a = _mm_setr_pd(`4.`, `9.`);
3567	let b = _mm_setr_pd(`4.`, `3.`);
3568	let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b);
3569	assert!(get_m128d(r, `0`).is_nan());
3570	assert_eq!(get_m128d(r, `1`), `9.`);
3571	}
3572
3573	#[simd_test(enable = "avx")]
3574	unsafe fn test_mm_cmp_ss() {
3575	let a = _mm_setr_ps(`4.`, `3.`, `2.`, `5.`);
3576	let b = _mm_setr_ps(`4.`, `9.`, `16.`, `25.`);
3577	let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b);
3578	assert!(get_m128(r, `0`).is_nan());
3579	assert_eq!(get_m128(r, `1`), `3.`);
3580	assert_eq!(get_m128(r, `2`), `2.`);
3581	assert_eq!(get_m128(r, `3`), `5.`);
3582	}
3583
3584	#[simd_test(enable = "avx")]
3585	unsafe fn test_mm256_cvtepi32_pd() {
3586	let a = _mm_setr_epi32(`4`, `9`, `16`, `25`);
3587	let r = _mm256_cvtepi32_pd(a);
3588	let e = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3589	assert_eq_m256d(r, e);
3590	}
3591
3592	#[simd_test(enable = "avx")]
3593	unsafe fn test_mm256_cvtepi32_ps() {
3594	let a = _mm256_setr_epi32(`4`, `9`, `16`, `25`, `4`, `9`, `16`, `25`);
3595	let r = _mm256_cvtepi32_ps(a);
3596	let e = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3597	assert_eq_m256(r, e);
3598	}
3599
3600	#[simd_test(enable = "avx")]
3601	unsafe fn test_mm256_cvtpd_ps() {
3602	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3603	let r = _mm256_cvtpd_ps(a);
3604	let e = _mm_setr_ps(`4.`, `9.`, `16.`, `25.`);
3605	assert_eq_m128(r, e);
3606	}
3607
3608	#[simd_test(enable = "avx")]
3609	unsafe fn test_mm256_cvtps_epi32() {
3610	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3611	let r = _mm256_cvtps_epi32(a);
3612	let e = _mm256_setr_epi32(`4`, `9`, `16`, `25`, `4`, `9`, `16`, `25`);
3613	assert_eq_m256i(r, e);
3614	}
3615
3616	#[simd_test(enable = "avx")]
3617	unsafe fn test_mm256_cvtps_pd() {
3618	let a = _mm_setr_ps(`4.`, `9.`, `16.`, `25.`);
3619	let r = _mm256_cvtps_pd(a);
3620	let e = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3621	assert_eq_m256d(r, e);
3622	}
3623
3624	#[simd_test(enable = "avx")]
3625	unsafe fn test_mm256_cvttpd_epi32() {
3626	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3627	let r = _mm256_cvttpd_epi32(a);
3628	let e = _mm_setr_epi32(`4`, `9`, `16`, `25`);
3629	assert_eq_m128i(r, e);
3630	}
3631
3632	#[simd_test(enable = "avx")]
3633	unsafe fn test_mm256_cvtpd_epi32() {
3634	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3635	let r = _mm256_cvtpd_epi32(a);
3636	let e = _mm_setr_epi32(`4`, `9`, `16`, `25`);
3637	assert_eq_m128i(r, e);
3638	}
3639
3640	#[simd_test(enable = "avx")]
3641	unsafe fn test_mm256_cvttps_epi32() {
3642	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3643	let r = _mm256_cvttps_epi32(a);
3644	let e = _mm256_setr_epi32(`4`, `9`, `16`, `25`, `4`, `9`, `16`, `25`);
3645	assert_eq_m256i(r, e);
3646	}
3647
3648	#[simd_test(enable = "avx")]
3649	unsafe fn test_mm256_extractf128_ps() {
3650	let a = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3651	let r = _mm256_extractf128_ps::<`0`>(a);
3652	let e = _mm_setr_ps(`4.`, `3.`, `2.`, `5.`);
3653	assert_eq_m128(r, e);
3654	}
3655
3656	#[simd_test(enable = "avx")]
3657	unsafe fn test_mm256_extractf128_pd() {
3658	let a = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3659	let r = _mm256_extractf128_pd::<`0`>(a);
3660	let e = _mm_setr_pd(`4.`, `3.`);
3661	assert_eq_m128d(r, e);
3662	}
3663
3664	#[simd_test(enable = "avx")]
3665	unsafe fn test_mm256_extractf128_si256() {
3666	let a = _mm256_setr_epi64x(`4`, `3`, `2`, `5`);
3667	let r = _mm256_extractf128_si256::<`0`>(a);
3668	let e = _mm_setr_epi64x(`4`, `3`);
3669	assert_eq_m128i(r, e);
3670	}
3671
3672	#[simd_test(enable = "avx")]
3673	unsafe fn test_mm256_zeroall() {
3674	_mm256_zeroall();
3675	}
3676
3677	#[simd_test(enable = "avx")]
3678	unsafe fn test_mm256_zeroupper() {
3679	_mm256_zeroupper();
3680	}
3681
3682	#[simd_test(enable = "avx")]
3683	unsafe fn test_mm256_permutevar_ps() {
3684	let a = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3685	let b = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
3686	let r = _mm256_permutevar_ps(a, b);
3687	let e = _mm256_setr_ps(`3.`, `2.`, `5.`, `4.`, `9.`, `64.`, `50.`, `8.`);
3688	assert_eq_m256(r, e);
3689	}
3690
3691	#[simd_test(enable = "avx")]
3692	unsafe fn test_mm_permutevar_ps() {
3693	let a = _mm_setr_ps(`4.`, `3.`, `2.`, `5.`);
3694	let b = _mm_setr_epi32(`1`, `2`, `3`, `4`);
3695	let r = _mm_permutevar_ps(a, b);
3696	let e = _mm_setr_ps(`3.`, `2.`, `5.`, `4.`);
3697	assert_eq_m128(r, e);
3698	}
3699
3700	#[simd_test(enable = "avx")]
3701	unsafe fn test_mm256_permute_ps() {
3702	let a = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3703	let r = _mm256_permute_ps::<`0x1b`>(a);
3704	let e = _mm256_setr_ps(`5.`, `2.`, `3.`, `4.`, `50.`, `64.`, `9.`, `8.`);
3705	assert_eq_m256(r, e);
3706	}
3707
3708	#[simd_test(enable = "avx")]
3709	unsafe fn test_mm_permute_ps() {
3710	let a = _mm_setr_ps(`4.`, `3.`, `2.`, `5.`);
3711	let r = _mm_permute_ps::<`0x1b`>(a);
3712	let e = _mm_setr_ps(`5.`, `2.`, `3.`, `4.`);
3713	assert_eq_m128(r, e);
3714	}
3715
3716	#[simd_test(enable = "avx")]
3717	unsafe fn test_mm256_permutevar_pd() {
3718	let a = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3719	let b = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3720	let r = _mm256_permutevar_pd(a, b);
3721	let e = _mm256_setr_pd(`4.`, `3.`, `5.`, `2.`);
3722	assert_eq_m256d(r, e);
3723	}
3724
3725	#[simd_test(enable = "avx")]
3726	unsafe fn test_mm_permutevar_pd() {
3727	let a = _mm_setr_pd(`4.`, `3.`);
3728	let b = _mm_setr_epi64x(`3`, `0`);
3729	let r = _mm_permutevar_pd(a, b);
3730	let e = _mm_setr_pd(`3.`, `4.`);
3731	assert_eq_m128d(r, e);
3732	}
3733
3734	#[simd_test(enable = "avx")]
3735	unsafe fn test_mm256_permute_pd() {
3736	let a = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3737	let r = _mm256_permute_pd::<`5`>(a);
3738	let e = _mm256_setr_pd(`3.`, `4.`, `5.`, `2.`);
3739	assert_eq_m256d(r, e);
3740	}
3741
3742	#[simd_test(enable = "avx")]
3743	unsafe fn test_mm_permute_pd() {
3744	let a = _mm_setr_pd(`4.`, `3.`);
3745	let r = _mm_permute_pd::<`1`>(a);
3746	let e = _mm_setr_pd(`3.`, `4.`);
3747	assert_eq_m128d(r, e);
3748	}
3749
3750	#[simd_test(enable = "avx")]
3751	unsafe fn test_mm256_permute2f128_ps() {
3752	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `1.`, `2.`, `3.`, `4.`);
3753	let b = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `5.`, `6.`, `7.`, `8.`);
3754	let r = _mm256_permute2f128_ps::<`0x13`>(a, b);
3755	let e = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `1.`, `2.`, `3.`, `4.`);
3756	assert_eq_m256(r, e);
3757	}
3758
3759	#[simd_test(enable = "avx")]
3760	unsafe fn test_mm256_permute2f128_pd() {
3761	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3762	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3763	let r = _mm256_permute2f128_pd::<`0x31`>(a, b);
3764	let e = _mm256_setr_pd(`3.`, `4.`, `7.`, `8.`);
3765	assert_eq_m256d(r, e);
3766	}
3767
3768	#[simd_test(enable = "avx")]
3769	unsafe fn test_mm256_permute2f128_si256() {
3770	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `1`, `2`, `3`, `4`);
3771	let b = _mm256_setr_epi32(`5`, `6`, `7`, `8`, `5`, `6`, `7`, `8`);
3772	let r = _mm256_permute2f128_si256::<`0x20`>(a, b);
3773	let e = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
3774	assert_eq_m256i(r, e);
3775	}
3776
3777	#[simd_test(enable = "avx")]
3778	unsafe fn test_mm256_broadcast_ss() {
3779	let r = _mm256_broadcast_ss(&`3.`);
3780	let e = _mm256_set1_ps(`3.`);
3781	assert_eq_m256(r, e);
3782	}
3783
3784	#[simd_test(enable = "avx")]
3785	unsafe fn test_mm_broadcast_ss() {
3786	let r = _mm_broadcast_ss(&`3.`);
3787	let e = _mm_set1_ps(`3.`);
3788	assert_eq_m128(r, e);
3789	}
3790
3791	#[simd_test(enable = "avx")]
3792	unsafe fn test_mm256_broadcast_sd() {
3793	let r = _mm256_broadcast_sd(&`3.`);
3794	let e = _mm256_set1_pd(`3.`);
3795	assert_eq_m256d(r, e);
3796	}
3797
3798	#[simd_test(enable = "avx")]
3799	unsafe fn test_mm256_broadcast_ps() {
3800	let a = _mm_setr_ps(`4.`, `3.`, `2.`, `5.`);
3801	let r = _mm256_broadcast_ps(&a);
3802	let e = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `4.`, `3.`, `2.`, `5.`);
3803	assert_eq_m256(r, e);
3804	}
3805
3806	#[simd_test(enable = "avx")]
3807	unsafe fn test_mm256_broadcast_pd() {
3808	let a = _mm_setr_pd(`4.`, `3.`);
3809	let r = _mm256_broadcast_pd(&a);
3810	let e = _mm256_setr_pd(`4.`, `3.`, `4.`, `3.`);
3811	assert_eq_m256d(r, e);
3812	}
3813
3814	#[simd_test(enable = "avx")]
3815	unsafe fn test_mm256_insertf128_ps() {
3816	let a = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3817	let b = _mm_setr_ps(`4.`, `9.`, `16.`, `25.`);
3818	let r = _mm256_insertf128_ps::<`0`>(a, b);
3819	let e = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `8.`, `9.`, `64.`, `50.`);
3820	assert_eq_m256(r, e);
3821	}
3822
3823	#[simd_test(enable = "avx")]
3824	unsafe fn test_mm256_insertf128_pd() {
3825	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3826	let b = _mm_setr_pd(`5.`, `6.`);
3827	let r = _mm256_insertf128_pd::<`0`>(a, b);
3828	let e = _mm256_setr_pd(`5.`, `6.`, `3.`, `4.`);
3829	assert_eq_m256d(r, e);
3830	}
3831
3832	#[simd_test(enable = "avx")]
3833	unsafe fn test_mm256_insertf128_si256() {
3834	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3835	let b = _mm_setr_epi64x(`5`, `6`);
3836	let r = _mm256_insertf128_si256::<`0`>(a, b);
3837	let e = _mm256_setr_epi64x(`5`, `6`, `3`, `4`);
3838	assert_eq_m256i(r, e);
3839	}
3840
3841	#[simd_test(enable = "avx")]
3842	unsafe fn test_mm256_insert_epi8() {
3843	#[rustfmt::skip]
3844	let a = _mm256_setr_epi8(
3845	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
3846	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3847	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
3848	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
3849	);
3850	let r = _mm256_insert_epi8::<`31`>(a, `0`);
3851	#[rustfmt::skip]
3852	let e = _mm256_setr_epi8(
3853	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
3854	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3855	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
3856	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`,
3857	);
3858	assert_eq_m256i(r, e);
3859	}
3860
3861	#[simd_test(enable = "avx")]
3862	unsafe fn test_mm256_insert_epi16() {
3863	#[rustfmt::skip]
3864	let a = _mm256_setr_epi16(
3865	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3866	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3867	);
3868	let r = _mm256_insert_epi16::<`15`>(a, `0`);
3869	#[rustfmt::skip]
3870	let e = _mm256_setr_epi16(
3871	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3872	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `0`,
3873	);
3874	assert_eq_m256i(r, e);
3875	}
3876
3877	#[simd_test(enable = "avx")]
3878	unsafe fn test_mm256_insert_epi32() {
3879	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
3880	let r = _mm256_insert_epi32::<`7`>(a, `0`);
3881	let e = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `0`);
3882	assert_eq_m256i(r, e);
3883	}
3884
3885	#[simd_test(enable = "avx")]
3886	unsafe fn test_mm256_load_pd() {
3887	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3888	let p = &a as *const _ as *const f64;
3889	let r = _mm256_load_pd(p);
3890	let e = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3891	assert_eq_m256d(r, e);
3892	}
3893
3894	#[simd_test(enable = "avx")]
3895	unsafe fn test_mm256_store_pd() {
3896	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3897	let mut r = _mm256_undefined_pd();
3898	_mm256_store_pd(&mut r as *mut _ as *mut f64, a);
3899	assert_eq_m256d(r, a);
3900	}
3901
3902	#[simd_test(enable = "avx")]
3903	unsafe fn test_mm256_load_ps() {
3904	let a = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3905	let p = &a as *const _ as *const f32;
3906	let r = _mm256_load_ps(p);
3907	let e = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3908	assert_eq_m256(r, e);
3909	}
3910
3911	#[simd_test(enable = "avx")]
3912	unsafe fn test_mm256_store_ps() {
3913	let a = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3914	let mut r = _mm256_undefined_ps();
3915	_mm256_store_ps(&mut r as *mut _ as *mut f32, a);
3916	assert_eq_m256(r, a);
3917	}
3918
3919	#[simd_test(enable = "avx")]
3920	unsafe fn test_mm256_loadu_pd() {
3921	let a = &[`1.0f64`, `2.`, `3.`, `4.`];
3922	let p = a.as_ptr();
3923	let r = _mm256_loadu_pd(black_box(p));
3924	let e = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3925	assert_eq_m256d(r, e);
3926	}
3927
3928	#[simd_test(enable = "avx")]
3929	unsafe fn test_mm256_storeu_pd() {
3930	let a = _mm256_set1_pd(`9.`);
3931	let mut r = _mm256_undefined_pd();
3932	_mm256_storeu_pd(&mut r as *mut _ as *mut f64, a);
3933	assert_eq_m256d(r, a);
3934	}
3935
3936	#[simd_test(enable = "avx")]
3937	unsafe fn test_mm256_loadu_ps() {
3938	let a = &[`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`];
3939	let p = a.as_ptr();
3940	let r = _mm256_loadu_ps(black_box(p));
3941	let e = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3942	assert_eq_m256(r, e);
3943	}
3944
3945	#[simd_test(enable = "avx")]
3946	unsafe fn test_mm256_storeu_ps() {
3947	let a = _mm256_set1_ps(`9.`);
3948	let mut r = _mm256_undefined_ps();
3949	_mm256_storeu_ps(&mut r as *mut _ as *mut f32, a);
3950	assert_eq_m256(r, a);
3951	}
3952
3953	#[simd_test(enable = "avx")]
3954	unsafe fn test_mm256_load_si256() {
3955	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3956	let p = &a as *const _;
3957	let r = _mm256_load_si256(p);
3958	let e = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3959	assert_eq_m256i(r, e);
3960	}
3961
3962	#[simd_test(enable = "avx")]
3963	unsafe fn test_mm256_store_si256() {
3964	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3965	let mut r = _mm256_undefined_si256();
3966	_mm256_store_si256(&mut r as *mut _, a);
3967	assert_eq_m256i(r, a);
3968	}
3969
3970	#[simd_test(enable = "avx")]
3971	unsafe fn test_mm256_loadu_si256() {
3972	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3973	let p = &a as *const _;
3974	let r = _mm256_loadu_si256(black_box(p));
3975	let e = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3976	assert_eq_m256i(r, e);
3977	}
3978
3979	#[simd_test(enable = "avx")]
3980	unsafe fn test_mm256_storeu_si256() {
3981	let a = _mm256_set1_epi8(`9`);
3982	let mut r = _mm256_undefined_si256();
3983	_mm256_storeu_si256(&mut r as *mut _, a);
3984	assert_eq_m256i(r, a);
3985	}
3986
3987	#[simd_test(enable = "avx")]
3988	unsafe fn test_mm256_maskload_pd() {
3989	let a = &[`1.0f64`, `2.`, `3.`, `4.`];
3990	let p = a.as_ptr();
3991	let mask = _mm256_setr_epi64x(`0`, !`0`, `0`, !`0`);
3992	let r = _mm256_maskload_pd(black_box(p), mask);
3993	let e = _mm256_setr_pd(`0.`, `2.`, `0.`, `4.`);
3994	assert_eq_m256d(r, e);
3995	}
3996
3997	#[simd_test(enable = "avx")]
3998	unsafe fn test_mm256_maskstore_pd() {
3999	let mut r = _mm256_set1_pd(`0.`);
4000	let mask = _mm256_setr_epi64x(`0`, !`0`, `0`, !`0`);
4001	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4002	_mm256_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a);
4003	let e = _mm256_setr_pd(`0.`, `2.`, `0.`, `4.`);
4004	assert_eq_m256d(r, e);
4005	}
4006
4007	#[simd_test(enable = "avx")]
4008	unsafe fn test_mm_maskload_pd() {
4009	let a = &[`1.0f64`, `2.`];
4010	let p = a.as_ptr();
4011	let mask = _mm_setr_epi64x(`0`, !`0`);
4012	let r = _mm_maskload_pd(black_box(p), mask);
4013	let e = _mm_setr_pd(`0.`, `2.`);
4014	assert_eq_m128d(r, e);
4015	}
4016
4017	#[simd_test(enable = "avx")]
4018	unsafe fn test_mm_maskstore_pd() {
4019	let mut r = _mm_set1_pd(`0.`);
4020	let mask = _mm_setr_epi64x(`0`, !`0`);
4021	let a = _mm_setr_pd(`1.`, `2.`);
4022	_mm_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a);
4023	let e = _mm_setr_pd(`0.`, `2.`);
4024	assert_eq_m128d(r, e);
4025	}
4026
4027	#[simd_test(enable = "avx")]
4028	unsafe fn test_mm256_maskload_ps() {
4029	let a = &[`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
4030	let p = a.as_ptr();
4031	let mask = _mm256_setr_epi32(`0`, !`0`, `0`, !`0`, `0`, !`0`, `0`, !`0`);
4032	let r = _mm256_maskload_ps(black_box(p), mask);
4033	let e = _mm256_setr_ps(`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`);
4034	assert_eq_m256(r, e);
4035	}
4036
4037	#[simd_test(enable = "avx")]
4038	unsafe fn test_mm256_maskstore_ps() {
4039	let mut r = _mm256_set1_ps(`0.`);
4040	let mask = _mm256_setr_epi32(`0`, !`0`, `0`, !`0`, `0`, !`0`, `0`, !`0`);
4041	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4042	_mm256_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a);
4043	let e = _mm256_setr_ps(`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`);
4044	assert_eq_m256(r, e);
4045	}
4046
4047	#[simd_test(enable = "avx")]
4048	unsafe fn test_mm_maskload_ps() {
4049	let a = &[`1.0f32`, `2.`, `3.`, `4.`];
4050	let p = a.as_ptr();
4051	let mask = _mm_setr_epi32(`0`, !`0`, `0`, !`0`);
4052	let r = _mm_maskload_ps(black_box(p), mask);
4053	let e = _mm_setr_ps(`0.`, `2.`, `0.`, `4.`);
4054	assert_eq_m128(r, e);
4055	}
4056
4057	#[simd_test(enable = "avx")]
4058	unsafe fn test_mm_maskstore_ps() {
4059	let mut r = _mm_set1_ps(`0.`);
4060	let mask = _mm_setr_epi32(`0`, !`0`, `0`, !`0`);
4061	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
4062	_mm_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a);
4063	let e = _mm_setr_ps(`0.`, `2.`, `0.`, `4.`);
4064	assert_eq_m128(r, e);
4065	}
4066
4067	#[simd_test(enable = "avx")]
4068	unsafe fn test_mm256_movehdup_ps() {
4069	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4070	let r = _mm256_movehdup_ps(a);
4071	let e = _mm256_setr_ps(`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`);
4072	assert_eq_m256(r, e);
4073	}
4074
4075	#[simd_test(enable = "avx")]
4076	unsafe fn test_mm256_moveldup_ps() {
4077	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4078	let r = _mm256_moveldup_ps(a);
4079	let e = _mm256_setr_ps(`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`);
4080	assert_eq_m256(r, e);
4081	}
4082
4083	#[simd_test(enable = "avx")]
4084	unsafe fn test_mm256_movedup_pd() {
4085	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4086	let r = _mm256_movedup_pd(a);
4087	let e = _mm256_setr_pd(`1.`, `1.`, `3.`, `3.`);
4088	assert_eq_m256d(r, e);
4089	}
4090
4091	#[simd_test(enable = "avx")]
4092	unsafe fn test_mm256_lddqu_si256() {
4093	#[rustfmt::skip]
4094	let a = _mm256_setr_epi8(
4095	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4096	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4097	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4098	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4099	);
4100	let p = &a as *const _;
4101	let r = _mm256_lddqu_si256(black_box(p));
4102	#[rustfmt::skip]
4103	let e = _mm256_setr_epi8(
4104	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4105	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4106	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4107	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4108	);
4109	assert_eq_m256i(r, e);
4110	}
4111
4112	#[simd_test(enable = "avx")]
4113	unsafe fn test_mm256_stream_si256() {
4114	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4115	let mut r = _mm256_undefined_si256();
4116	_mm256_stream_si256(&mut r as *mut _, a);
4117	assert_eq_m256i(r, a);
4118	}
4119
4120	#[simd_test(enable = "avx")]
4121	unsafe fn test_mm256_stream_pd() {
4122	#[repr(align(`32`))]
4123	struct Memory {
4124	pub data: [f64; `4`],
4125	}
4126	let a = _mm256_set1_pd(`7.0`);
4127	let mut mem = Memory { data: [`-1.0`; `4`] };
4128
4129	_mm256_stream_pd(&mut mem.data[`0`] as *mut f64, a);
4130	for i in `0`..`4` {
4131	assert_eq!(mem.data[i], get_m256d(a, i));
4132	}
4133	}
4134
4135	#[simd_test(enable = "avx")]
4136	unsafe fn test_mm256_stream_ps() {
4137	#[repr(align(`32`))]
4138	struct Memory {
4139	pub data: [f32; `8`],
4140	}
4141	let a = _mm256_set1_ps(`7.0`);
4142	let mut mem = Memory { data: [`-1.0`; `8`] };
4143
4144	_mm256_stream_ps(&mut mem.data[`0`] as *mut f32, a);
4145	for i in `0`..`8` {
4146	assert_eq!(mem.data[i], get_m256(a, i));
4147	}
4148	}
4149
4150	#[simd_test(enable = "avx")]
4151	unsafe fn test_mm256_rcp_ps() {
4152	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4153	let r = _mm256_rcp_ps(a);
4154	#[rustfmt::skip]
4155	let e = _mm256_setr_ps(
4156	`0.99975586`, `0.49987793`, `0.33325195`, `0.24993896`,
4157	`0.19995117`, `0.16662598`, `0.14282227`, `0.12496948`,
4158	);
4159	let rel_err = `0.00048828125`;
4160	for i in `0`..`8` {
4161	assert_approx_eq!(get_m256(r, i), get_m256(e, i), `2.` * rel_err);
4162	}
4163	}
4164
4165	#[simd_test(enable = "avx")]
4166	unsafe fn test_mm256_rsqrt_ps() {
4167	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4168	let r = _mm256_rsqrt_ps(a);
4169	#[rustfmt::skip]
4170	let e = _mm256_setr_ps(
4171	`0.99975586`, `0.7069092`, `0.5772705`, `0.49987793`,
4172	`0.44714355`, `0.40820313`, `0.3779297`, `0.3534546`,
4173	);
4174	let rel_err = `0.00048828125`;
4175	for i in `0`..`8` {
4176	assert_approx_eq!(get_m256(r, i), get_m256(e, i), `2.` * rel_err);
4177	}
4178	}
4179
4180	#[simd_test(enable = "avx")]
4181	unsafe fn test_mm256_unpackhi_pd() {
4182	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4183	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
4184	let r = _mm256_unpackhi_pd(a, b);
4185	let e = _mm256_setr_pd(`2.`, `6.`, `4.`, `8.`);
4186	assert_eq_m256d(r, e);
4187	}
4188
4189	#[simd_test(enable = "avx")]
4190	unsafe fn test_mm256_unpackhi_ps() {
4191	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4192	let b = _mm256_setr_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
4193	let r = _mm256_unpackhi_ps(a, b);
4194	let e = _mm256_setr_ps(`3.`, `11.`, `4.`, `12.`, `7.`, `15.`, `8.`, `16.`);
4195	assert_eq_m256(r, e);
4196	}
4197
4198	#[simd_test(enable = "avx")]
4199	unsafe fn test_mm256_unpacklo_pd() {
4200	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4201	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
4202	let r = _mm256_unpacklo_pd(a, b);
4203	let e = _mm256_setr_pd(`1.`, `5.`, `3.`, `7.`);
4204	assert_eq_m256d(r, e);
4205	}
4206
4207	#[simd_test(enable = "avx")]
4208	unsafe fn test_mm256_unpacklo_ps() {
4209	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4210	let b = _mm256_setr_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
4211	let r = _mm256_unpacklo_ps(a, b);
4212	let e = _mm256_setr_ps(`1.`, `9.`, `2.`, `10.`, `5.`, `13.`, `6.`, `14.`);
4213	assert_eq_m256(r, e);
4214	}
4215
4216	#[simd_test(enable = "avx")]
4217	unsafe fn test_mm256_testz_si256() {
4218	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4219	let b = _mm256_setr_epi64x(`5`, `6`, `7`, `8`);
4220	let r = _mm256_testz_si256(a, b);
4221	assert_eq!(r, `0`);
4222	let b = _mm256_set1_epi64x(`0`);
4223	let r = _mm256_testz_si256(a, b);
4224	assert_eq!(r, `1`);
4225	}
4226
4227	#[simd_test(enable = "avx")]
4228	unsafe fn test_mm256_testc_si256() {
4229	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4230	let b = _mm256_setr_epi64x(`5`, `6`, `7`, `8`);
4231	let r = _mm256_testc_si256(a, b);
4232	assert_eq!(r, `0`);
4233	let b = _mm256_set1_epi64x(`0`);
4234	let r = _mm256_testc_si256(a, b);
4235	assert_eq!(r, `1`);
4236	}
4237
4238	#[simd_test(enable = "avx")]
4239	unsafe fn test_mm256_testnzc_si256() {
4240	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4241	let b = _mm256_setr_epi64x(`5`, `6`, `7`, `8`);
4242	let r = _mm256_testnzc_si256(a, b);
4243	assert_eq!(r, `1`);
4244	let a = _mm256_setr_epi64x(`0`, `0`, `0`, `0`);
4245	let b = _mm256_setr_epi64x(`0`, `0`, `0`, `0`);
4246	let r = _mm256_testnzc_si256(a, b);
4247	assert_eq!(r, `0`);
4248	}
4249
4250	#[simd_test(enable = "avx")]
4251	unsafe fn test_mm256_testz_pd() {
4252	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4253	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
4254	let r = _mm256_testz_pd(a, b);
4255	assert_eq!(r, `1`);
4256	let a = _mm256_set1_pd(`-1.`);
4257	let r = _mm256_testz_pd(a, a);
4258	assert_eq!(r, `0`);
4259	}
4260
4261	#[simd_test(enable = "avx")]
4262	unsafe fn test_mm256_testc_pd() {
4263	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4264	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
4265	let r = _mm256_testc_pd(a, b);
4266	assert_eq!(r, `1`);
4267	let a = _mm256_set1_pd(`1.`);
4268	let b = _mm256_set1_pd(`-1.`);
4269	let r = _mm256_testc_pd(a, b);
4270	assert_eq!(r, `0`);
4271	}
4272
4273	#[simd_test(enable = "avx")]
4274	unsafe fn test_mm256_testnzc_pd() {
4275	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4276	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
4277	let r = _mm256_testnzc_pd(a, b);
4278	assert_eq!(r, `0`);
4279	let a = _mm256_setr_pd(`1.`, `-1.`, `-1.`, `-1.`);
4280	let b = _mm256_setr_pd(`-1.`, `-1.`, `1.`, `1.`);
4281	let r = _mm256_testnzc_pd(a, b);
4282	assert_eq!(r, `1`);
4283	}
4284
4285	#[simd_test(enable = "avx")]
4286	unsafe fn test_mm_testz_pd() {
4287	let a = _mm_setr_pd(`1.`, `2.`);
4288	let b = _mm_setr_pd(`5.`, `6.`);
4289	let r = _mm_testz_pd(a, b);
4290	assert_eq!(r, `1`);
4291	let a = _mm_set1_pd(`-1.`);
4292	let r = _mm_testz_pd(a, a);
4293	assert_eq!(r, `0`);
4294	}
4295
4296	#[simd_test(enable = "avx")]
4297	unsafe fn test_mm_testc_pd() {
4298	let a = _mm_setr_pd(`1.`, `2.`);
4299	let b = _mm_setr_pd(`5.`, `6.`);
4300	let r = _mm_testc_pd(a, b);
4301	assert_eq!(r, `1`);
4302	let a = _mm_set1_pd(`1.`);
4303	let b = _mm_set1_pd(`-1.`);
4304	let r = _mm_testc_pd(a, b);
4305	assert_eq!(r, `0`);
4306	}
4307
4308	#[simd_test(enable = "avx")]
4309	unsafe fn test_mm_testnzc_pd() {
4310	let a = _mm_setr_pd(`1.`, `2.`);
4311	let b = _mm_setr_pd(`5.`, `6.`);
4312	let r = _mm_testnzc_pd(a, b);
4313	assert_eq!(r, `0`);
4314	let a = _mm_setr_pd(`1.`, `-1.`);
4315	let b = _mm_setr_pd(`-1.`, `-1.`);
4316	let r = _mm_testnzc_pd(a, b);
4317	assert_eq!(r, `1`);
4318	}
4319
4320	#[simd_test(enable = "avx")]
4321	unsafe fn test_mm256_testz_ps() {
4322	let a = _mm256_set1_ps(`1.`);
4323	let r = _mm256_testz_ps(a, a);
4324	assert_eq!(r, `1`);
4325	let a = _mm256_set1_ps(`-1.`);
4326	let r = _mm256_testz_ps(a, a);
4327	assert_eq!(r, `0`);
4328	}
4329
4330	#[simd_test(enable = "avx")]
4331	unsafe fn test_mm256_testc_ps() {
4332	let a = _mm256_set1_ps(`1.`);
4333	let r = _mm256_testc_ps(a, a);
4334	assert_eq!(r, `1`);
4335	let b = _mm256_set1_ps(`-1.`);
4336	let r = _mm256_testc_ps(a, b);
4337	assert_eq!(r, `0`);
4338	}
4339
4340	#[simd_test(enable = "avx")]
4341	unsafe fn test_mm256_testnzc_ps() {
4342	let a = _mm256_set1_ps(`1.`);
4343	let r = _mm256_testnzc_ps(a, a);
4344	assert_eq!(r, `0`);
4345	let a = _mm256_setr_ps(`1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`);
4346	let b = _mm256_setr_ps(`-1.`, `-1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`);
4347	let r = _mm256_testnzc_ps(a, b);
4348	assert_eq!(r, `1`);
4349	}
4350
4351	#[simd_test(enable = "avx")]
4352	unsafe fn test_mm_testz_ps() {
4353	let a = _mm_set1_ps(`1.`);
4354	let r = _mm_testz_ps(a, a);
4355	assert_eq!(r, `1`);
4356	let a = _mm_set1_ps(`-1.`);
4357	let r = _mm_testz_ps(a, a);
4358	assert_eq!(r, `0`);
4359	}
4360
4361	#[simd_test(enable = "avx")]
4362	unsafe fn test_mm_testc_ps() {
4363	let a = _mm_set1_ps(`1.`);
4364	let r = _mm_testc_ps(a, a);
4365	assert_eq!(r, `1`);
4366	let b = _mm_set1_ps(`-1.`);
4367	let r = _mm_testc_ps(a, b);
4368	assert_eq!(r, `0`);
4369	}
4370
4371	#[simd_test(enable = "avx")]
4372	unsafe fn test_mm_testnzc_ps() {
4373	let a = _mm_set1_ps(`1.`);
4374	let r = _mm_testnzc_ps(a, a);
4375	assert_eq!(r, `0`);
4376	let a = _mm_setr_ps(`1.`, `-1.`, `-1.`, `-1.`);
4377	let b = _mm_setr_ps(`-1.`, `-1.`, `1.`, `1.`);
4378	let r = _mm_testnzc_ps(a, b);
4379	assert_eq!(r, `1`);
4380	}
4381
4382	#[simd_test(enable = "avx")]
4383	unsafe fn test_mm256_movemask_pd() {
4384	let a = _mm256_setr_pd(`1.`, `-2.`, `3.`, `-4.`);
4385	let r = _mm256_movemask_pd(a);
4386	assert_eq!(r, `0xA`);
4387	}
4388
4389	#[simd_test(enable = "avx")]
4390	unsafe fn test_mm256_movemask_ps() {
4391	let a = _mm256_setr_ps(`1.`, `-2.`, `3.`, `-4.`, `1.`, `-2.`, `3.`, `-4.`);
4392	let r = _mm256_movemask_ps(a);
4393	assert_eq!(r, `0xAA`);
4394	}
4395
4396	#[simd_test(enable = "avx")]
4397	unsafe fn test_mm256_setzero_pd() {
4398	let r = _mm256_setzero_pd();
4399	assert_eq_m256d(r, _mm256_set1_pd(`0.`));
4400	}
4401
4402	#[simd_test(enable = "avx")]
4403	unsafe fn test_mm256_setzero_ps() {
4404	let r = _mm256_setzero_ps();
4405	assert_eq_m256(r, _mm256_set1_ps(`0.`));
4406	}
4407
4408	#[simd_test(enable = "avx")]
4409	unsafe fn test_mm256_setzero_si256() {
4410	let r = _mm256_setzero_si256();
4411	assert_eq_m256i(r, _mm256_set1_epi8(`0`));
4412	}
4413
4414	#[simd_test(enable = "avx")]
4415	unsafe fn test_mm256_set_pd() {
4416	let r = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
4417	assert_eq_m256d(r, _mm256_setr_pd(`4.`, `3.`, `2.`, `1.`));
4418	}
4419
4420	#[simd_test(enable = "avx")]
4421	unsafe fn test_mm256_set_ps() {
4422	let r = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4423	assert_eq_m256(r, _mm256_setr_ps(`8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`));
4424	}
4425
4426	#[simd_test(enable = "avx")]
4427	unsafe fn test_mm256_set_epi8() {
4428	#[rustfmt::skip]
4429	let r = _mm256_set_epi8(
4430	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4431	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4432	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4433	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4434	);
4435	#[rustfmt::skip]
4436	let e = _mm256_setr_epi8(
4437	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`,
4438	`24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`,
4439	`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`,
4440	`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`
4441	);
4442	assert_eq_m256i(r, e);
4443	}
4444
4445	#[simd_test(enable = "avx")]
4446	unsafe fn test_mm256_set_epi16() {
4447	#[rustfmt::skip]
4448	let r = _mm256_set_epi16(
4449	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4450	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4451	);
4452	#[rustfmt::skip]
4453	let e = _mm256_setr_epi16(
4454	`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`,
4455	`7`, `6`, `5`, `4`, `3`, `2`, `1`,
4456	);
4457	assert_eq_m256i(r, e);
4458	}
4459
4460	#[simd_test(enable = "avx")]
4461	unsafe fn test_mm256_set_epi32() {
4462	let r = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
4463	assert_eq_m256i(r, _mm256_setr_epi32(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`));
4464	}
4465
4466	#[simd_test(enable = "avx")]
4467	unsafe fn test_mm256_set_epi64x() {
4468	let r = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
4469	assert_eq_m256i(r, _mm256_setr_epi64x(`4`, `3`, `2`, `1`));
4470	}
4471
4472	#[simd_test(enable = "avx")]
4473	unsafe fn test_mm256_setr_pd() {
4474	let r = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4475	assert_eq_m256d(r, _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`));
4476	}
4477
4478	#[simd_test(enable = "avx")]
4479	unsafe fn test_mm256_setr_ps() {
4480	let r = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4481	assert_eq_m256(r, _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`));
4482	}
4483
4484	#[simd_test(enable = "avx")]
4485	unsafe fn test_mm256_setr_epi8() {
4486	#[rustfmt::skip]
4487	let r = _mm256_setr_epi8(
4488	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4489	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4490	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4491	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4492	);
4493	#[rustfmt::skip]
4494	let e = _mm256_setr_epi8(
4495	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4496	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4497	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4498	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`
4499	);
4500
4501	assert_eq_m256i(r, e);
4502	}
4503
4504	#[simd_test(enable = "avx")]
4505	unsafe fn test_mm256_setr_epi16() {
4506	#[rustfmt::skip]
4507	let r = _mm256_setr_epi16(
4508	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4509	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4510	);
4511	#[rustfmt::skip]
4512	let e = _mm256_setr_epi16(
4513	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4514	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4515	);
4516	assert_eq_m256i(r, e);
4517	}
4518
4519	#[simd_test(enable = "avx")]
4520	unsafe fn test_mm256_setr_epi32() {
4521	let r = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
4522	assert_eq_m256i(r, _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`));
4523	}
4524
4525	#[simd_test(enable = "avx")]
4526	unsafe fn test_mm256_setr_epi64x() {
4527	let r = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4528	assert_eq_m256i(r, _mm256_setr_epi64x(`1`, `2`, `3`, `4`));
4529	}
4530
4531	#[simd_test(enable = "avx")]
4532	unsafe fn test_mm256_set1_pd() {
4533	let r = _mm256_set1_pd(`1.`);
4534	assert_eq_m256d(r, _mm256_set1_pd(`1.`));
4535	}
4536
4537	#[simd_test(enable = "avx")]
4538	unsafe fn test_mm256_set1_ps() {
4539	let r = _mm256_set1_ps(`1.`);
4540	assert_eq_m256(r, _mm256_set1_ps(`1.`));
4541	}
4542
4543	#[simd_test(enable = "avx")]
4544	unsafe fn test_mm256_set1_epi8() {
4545	let r = _mm256_set1_epi8(`1`);
4546	assert_eq_m256i(r, _mm256_set1_epi8(`1`));
4547	}
4548
4549	#[simd_test(enable = "avx")]
4550	unsafe fn test_mm256_set1_epi16() {
4551	let r = _mm256_set1_epi16(`1`);
4552	assert_eq_m256i(r, _mm256_set1_epi16(`1`));
4553	}
4554
4555	#[simd_test(enable = "avx")]
4556	unsafe fn test_mm256_set1_epi32() {
4557	let r = _mm256_set1_epi32(`1`);
4558	assert_eq_m256i(r, _mm256_set1_epi32(`1`));
4559	}
4560
4561	#[simd_test(enable = "avx")]
4562	unsafe fn test_mm256_set1_epi64x() {
4563	let r = _mm256_set1_epi64x(`1`);
4564	assert_eq_m256i(r, _mm256_set1_epi64x(`1`));
4565	}
4566
4567	#[simd_test(enable = "avx")]
4568	unsafe fn test_mm256_castpd_ps() {
4569	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4570	let r = _mm256_castpd_ps(a);
4571	let e = _mm256_setr_ps(`0.`, `1.875`, `0.`, `2.`, `0.`, `2.125`, `0.`, `2.25`);
4572	assert_eq_m256(r, e);
4573	}
4574
4575	#[simd_test(enable = "avx")]
4576	unsafe fn test_mm256_castps_pd() {
4577	let a = _mm256_setr_ps(`0.`, `1.875`, `0.`, `2.`, `0.`, `2.125`, `0.`, `2.25`);
4578	let r = _mm256_castps_pd(a);
4579	let e = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4580	assert_eq_m256d(r, e);
4581	}
4582
4583	#[simd_test(enable = "avx")]
4584	unsafe fn test_mm256_castps_si256() {
4585	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4586	let r = _mm256_castps_si256(a);
4587	#[rustfmt::skip]
4588	let e = _mm256_setr_epi8(
4589	`0`, `0`, `-128`, `63`, `0`, `0`, `0`, `64`,
4590	`0`, `0`, `64`, `64`, `0`, `0`, `-128`, `64`,
4591	`0`, `0`, `-96`, `64`, `0`, `0`, `-64`, `64`,
4592	`0`, `0`, `-32`, `64`, `0`, `0`, `0`, `65`,
4593	);
4594	assert_eq_m256i(r, e);
4595	}
4596
4597	#[simd_test(enable = "avx")]
4598	unsafe fn test_mm256_castsi256_ps() {
4599	#[rustfmt::skip]
4600	let a = _mm256_setr_epi8(
4601	`0`, `0`, `-128`, `63`, `0`, `0`, `0`, `64`,
4602	`0`, `0`, `64`, `64`, `0`, `0`, `-128`, `64`,
4603	`0`, `0`, `-96`, `64`, `0`, `0`, `-64`, `64`,
4604	`0`, `0`, `-32`, `64`, `0`, `0`, `0`, `65`,
4605	);
4606	let r = _mm256_castsi256_ps(a);
4607	let e = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4608	assert_eq_m256(r, e);
4609	}
4610
4611	#[simd_test(enable = "avx")]
4612	unsafe fn test_mm256_castpd_si256() {
4613	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4614	let r = _mm256_castpd_si256(a);
4615	assert_eq_m256d(transmute(r), a);
4616	}
4617
4618	#[simd_test(enable = "avx")]
4619	unsafe fn test_mm256_castsi256_pd() {
4620	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4621	let r = _mm256_castsi256_pd(a);
4622	assert_eq_m256d(r, transmute(a));
4623	}
4624
4625	#[simd_test(enable = "avx")]
4626	unsafe fn test_mm256_castps256_ps128() {
4627	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4628	let r = _mm256_castps256_ps128(a);
4629	assert_eq_m128(r, _mm_setr_ps(`1.`, `2.`, `3.`, `4.`));
4630	}
4631
4632	#[simd_test(enable = "avx")]
4633	unsafe fn test_mm256_castpd256_pd128() {
4634	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4635	let r = _mm256_castpd256_pd128(a);
4636	assert_eq_m128d(r, _mm_setr_pd(`1.`, `2.`));
4637	}
4638
4639	#[simd_test(enable = "avx")]
4640	unsafe fn test_mm256_castsi256_si128() {
4641	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4642	let r = _mm256_castsi256_si128(a);
4643	assert_eq_m128i(r, _mm_setr_epi64x(`1`, `2`));
4644	}
4645
4646	#[simd_test(enable = "avx")]
4647	unsafe fn test_mm256_zextps128_ps256() {
4648	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
4649	let r = _mm256_zextps128_ps256(a);
4650	let e = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `0.`, `0.`, `0.`);
4651	assert_eq_m256(r, e);
4652	}
4653
4654	#[simd_test(enable = "avx")]
4655	unsafe fn test_mm256_zextsi128_si256() {
4656	let a = _mm_setr_epi64x(`1`, `2`);
4657	let r = _mm256_zextsi128_si256(a);
4658	let e = _mm256_setr_epi64x(`1`, `2`, `0`, `0`);
4659	assert_eq_m256i(r, e);
4660	}
4661
4662	#[simd_test(enable = "avx")]
4663	unsafe fn test_mm256_zextpd128_pd256() {
4664	let a = _mm_setr_pd(`1.`, `2.`);
4665	let r = _mm256_zextpd128_pd256(a);
4666	let e = _mm256_setr_pd(`1.`, `2.`, `0.`, `0.`);
4667	assert_eq_m256d(r, e);
4668	}
4669
4670	#[simd_test(enable = "avx")]
4671	unsafe fn test_mm256_set_m128() {
4672	let hi = _mm_setr_ps(`5.`, `6.`, `7.`, `8.`);
4673	let lo = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
4674	let r = _mm256_set_m128(hi, lo);
4675	let e = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4676	assert_eq_m256(r, e);
4677	}
4678
4679	#[simd_test(enable = "avx")]
4680	unsafe fn test_mm256_set_m128d() {
4681	let hi = _mm_setr_pd(`3.`, `4.`);
4682	let lo = _mm_setr_pd(`1.`, `2.`);
4683	let r = _mm256_set_m128d(hi, lo);
4684	let e = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4685	assert_eq_m256d(r, e);
4686	}
4687
4688	#[simd_test(enable = "avx")]
4689	unsafe fn test_mm256_set_m128i() {
4690	#[rustfmt::skip]
4691	let hi = _mm_setr_epi8(
4692	`17`, `18`, `19`, `20`,
4693	`21`, `22`, `23`, `24`,
4694	`25`, `26`, `27`, `28`,
4695	`29`, `30`, `31`, `32`,
4696	);
4697	#[rustfmt::skip]
4698	let lo = _mm_setr_epi8(
4699	`1`, `2`, `3`, `4`,
4700	`5`, `6`, `7`, `8`,
4701	`9`, `10`, `11`, `12`,
4702	`13`, `14`, `15`, `16`,
4703	);
4704	let r = _mm256_set_m128i(hi, lo);
4705	#[rustfmt::skip]
4706	let e = _mm256_setr_epi8(
4707	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4708	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4709	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4710	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4711	);
4712	assert_eq_m256i(r, e);
4713	}
4714
4715	#[simd_test(enable = "avx")]
4716	unsafe fn test_mm256_setr_m128() {
4717	let lo = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
4718	let hi = _mm_setr_ps(`5.`, `6.`, `7.`, `8.`);
4719	let r = _mm256_setr_m128(lo, hi);
4720	let e = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4721	assert_eq_m256(r, e);
4722	}
4723
4724	#[simd_test(enable = "avx")]
4725	unsafe fn test_mm256_setr_m128d() {
4726	let lo = _mm_setr_pd(`1.`, `2.`);
4727	let hi = _mm_setr_pd(`3.`, `4.`);
4728	let r = _mm256_setr_m128d(lo, hi);
4729	let e = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4730	assert_eq_m256d(r, e);
4731	}
4732
4733	#[simd_test(enable = "avx")]
4734	unsafe fn test_mm256_setr_m128i() {
4735	#[rustfmt::skip]
4736	let lo = _mm_setr_epi8(
4737	`1`, `2`, `3`, `4`,
4738	`5`, `6`, `7`, `8`,
4739	`9`, `10`, `11`, `12`,
4740	`13`, `14`, `15`, `16`,
4741	);
4742	#[rustfmt::skip]
4743	let hi = _mm_setr_epi8(
4744	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4745	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4746	);
4747	let r = _mm256_setr_m128i(lo, hi);
4748	#[rustfmt::skip]
4749	let e = _mm256_setr_epi8(
4750	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4751	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4752	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4753	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4754	);
4755	assert_eq_m256i(r, e);
4756	}
4757
4758	#[simd_test(enable = "avx")]
4759	unsafe fn test_mm256_loadu2_m128() {
4760	let hi = &[`5.`, `6.`, `7.`, `8.`];
4761	let hiaddr = hi.as_ptr();
4762	let lo = &[`1.`, `2.`, `3.`, `4.`];
4763	let loaddr = lo.as_ptr();
4764	let r = _mm256_loadu2_m128(hiaddr, loaddr);
4765	let e = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4766	assert_eq_m256(r, e);
4767	}
4768
4769	#[simd_test(enable = "avx")]
4770	unsafe fn test_mm256_loadu2_m128d() {
4771	let hi = &[`3.`, `4.`];
4772	let hiaddr = hi.as_ptr();
4773	let lo = &[`1.`, `2.`];
4774	let loaddr = lo.as_ptr();
4775	let r = _mm256_loadu2_m128d(hiaddr, loaddr);
4776	let e = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4777	assert_eq_m256d(r, e);
4778	}
4779
4780	#[simd_test(enable = "avx")]
4781	unsafe fn test_mm256_loadu2_m128i() {
4782	#[rustfmt::skip]
4783	let hi = _mm_setr_epi8(
4784	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4785	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4786	);
4787	#[rustfmt::skip]
4788	let lo = _mm_setr_epi8(
4789	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4790	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4791	);
4792	let r = _mm256_loadu2_m128i(&hi as *const _ as *const _, &lo as *const _ as *const _);
4793	#[rustfmt::skip]
4794	let e = _mm256_setr_epi8(
4795	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4796	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4797	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4798	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4799	);
4800	assert_eq_m256i(r, e);
4801	}
4802
4803	#[simd_test(enable = "avx")]
4804	unsafe fn test_mm256_storeu2_m128() {
4805	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4806	let mut hi = _mm_undefined_ps();
4807	let mut lo = _mm_undefined_ps();
4808	_mm256_storeu2_m128(
4809	&mut hi as *mut _ as *mut f32,
4810	&mut lo as *mut _ as *mut f32,
4811	a,
4812	);
4813	assert_eq_m128(hi, _mm_setr_ps(`5.`, `6.`, `7.`, `8.`));
4814	assert_eq_m128(lo, _mm_setr_ps(`1.`, `2.`, `3.`, `4.`));
4815	}
4816
4817	#[simd_test(enable = "avx")]
4818	unsafe fn test_mm256_storeu2_m128d() {
4819	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4820	let mut hi = _mm_undefined_pd();
4821	let mut lo = _mm_undefined_pd();
4822	_mm256_storeu2_m128d(
4823	&mut hi as *mut _ as *mut f64,
4824	&mut lo as *mut _ as *mut f64,
4825	a,
4826	);
4827	assert_eq_m128d(hi, _mm_setr_pd(`3.`, `4.`));
4828	assert_eq_m128d(lo, _mm_setr_pd(`1.`, `2.`));
4829	}
4830
4831	#[simd_test(enable = "avx")]
4832	unsafe fn test_mm256_storeu2_m128i() {
4833	#[rustfmt::skip]
4834	let a = _mm256_setr_epi8(
4835	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4836	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4837	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4838	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4839	);
4840	let mut hi = _mm_undefined_si128();
4841	let mut lo = _mm_undefined_si128();
4842	_mm256_storeu2_m128i(&mut hi as *mut _, &mut lo as *mut _, a);
4843	#[rustfmt::skip]
4844	let e_hi = _mm_setr_epi8(
4845	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4846	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`
4847	);
4848	#[rustfmt::skip]
4849	let e_lo = _mm_setr_epi8(
4850	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4851	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`
4852	);
4853
4854	assert_eq_m128i(hi, e_hi);
4855	assert_eq_m128i(lo, e_lo);
4856	}
4857
4858	#[simd_test(enable = "avx")]
4859	unsafe fn test_mm256_cvtss_f32() {
4860	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4861	let r = _mm256_cvtss_f32(a);
4862	assert_eq!(r, `1.`);
4863	}
4864	}
4865