avx.rs source code [crates/core_arch/src/x86/avx.rs]

1	//! Advanced Vector Extensions (AVX)
2	//!
3	//! The references are:
4	//!
5	//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
6	//! Instruction Set Reference, A-Z][intel64_ref]. - [AMD64 Architecture
7	//! Programmer's Manual, Volume 3: General-Purpose and System
8	//! Instructions][amd64_ref].
9	//!
10	//! [Wikipedia][wiki] provides a quick overview of the instructions available.
11	//!
12	//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
13	//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
14	//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
15
16	use crate::{
17	core_arch::{simd::, x86::},
18	intrinsics::simd::*,
19	mem, ptr,
20	};
21
22	#[cfg(test)]
23	use stdarch_test::assert_instr;
24
25	/// Adds packed double-precision (64-bit) floating-point elements
26	/// in `a` and `b`.
27	///
28	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_pd)
29	#[inline]
30	#[target_feature(enable = "avx")]
31	#[cfg_attr(test, assert_instr(vaddpd))]
32	#[stable(feature = "simd_x86", since = "1.27.0")]
33	pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
34	simd_add(x:a, y:b)
35	}
36
37	/// Adds packed single-precision (32-bit) floating-point elements in `a` and
38	/// `b`.
39	///
40	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_ps)
41	#[inline]
42	#[target_feature(enable = "avx")]
43	#[cfg_attr(test, assert_instr(vaddps))]
44	#[stable(feature = "simd_x86", since = "1.27.0")]
45	pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
46	simd_add(x:a, y:b)
47	}
48
49	/// Computes the bitwise AND of a packed double-precision (64-bit)
50	/// floating-point elements in `a` and `b`.
51	///
52	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_pd)
53	#[inline]
54	#[target_feature(enable = "avx")]
55	// FIXME: Should be 'vandpd' instruction.
56	// See https://github.com/rust-lang/stdarch/issues/71
57	#[cfg_attr(test, assert_instr(vandps))]
58	#[stable(feature = "simd_x86", since = "1.27.0")]
59	pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
60	let a: u64x4 = transmute(src:a);
61	let b: u64x4 = transmute(src:b);
62	transmute(src:simd_and(x:a, y:b))
63	}
64
65	/// Computes the bitwise AND of packed single-precision (32-bit) floating-point
66	/// elements in `a` and `b`.
67	///
68	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_ps)
69	#[inline]
70	#[target_feature(enable = "avx")]
71	#[cfg_attr(test, assert_instr(vandps))]
72	#[stable(feature = "simd_x86", since = "1.27.0")]
73	pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
74	let a: u32x8 = transmute(src:a);
75	let b: u32x8 = transmute(src:b);
76	transmute(src:simd_and(x:a, y:b))
77	}
78
79	/// Computes the bitwise OR packed double-precision (64-bit) floating-point
80	/// elements in `a` and `b`.
81	///
82	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_pd)
83	#[inline]
84	#[target_feature(enable = "avx")]
85	// FIXME: should be `vorpd` instruction.
86	// See <https://github.com/rust-lang/stdarch/issues/71>.
87	#[cfg_attr(test, assert_instr(vorps))]
88	#[stable(feature = "simd_x86", since = "1.27.0")]
89	pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
90	let a: u64x4 = transmute(src:a);
91	let b: u64x4 = transmute(src:b);
92	transmute(src:simd_or(x:a, y:b))
93	}
94
95	/// Computes the bitwise OR packed single-precision (32-bit) floating-point
96	/// elements in `a` and `b`.
97	///
98	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_ps)
99	#[inline]
100	#[target_feature(enable = "avx")]
101	#[cfg_attr(test, assert_instr(vorps))]
102	#[stable(feature = "simd_x86", since = "1.27.0")]
103	pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
104	let a: u32x8 = transmute(src:a);
105	let b: u32x8 = transmute(src:b);
106	transmute(src:simd_or(x:a, y:b))
107	}
108
109	/// Shuffles double-precision (64-bit) floating-point elements within 128-bit
110	/// lanes using the control in `imm8`.
111	///
112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_pd)
113	#[inline]
114	#[target_feature(enable = "avx")]
115	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
116	#[rustc_legacy_const_generics(`2`)]
117	#[stable(feature = "simd_x86", since = "1.27.0")]
118	pub unsafe fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
119	static_assert_uimm_bits!(MASK, `8`);
120	simd_shuffle!(
121	a,
122	b,
123	[
124	MASK as u32 & `0b1`,
125	((MASK as u32 >> `1`) & `0b1`) + `4`,
126	((MASK as u32 >> `2`) & `0b1`) + `2`,
127	((MASK as u32 >> `3`) & `0b1`) + `6`,
128	],
129	)
130	}
131
132	/// Shuffles single-precision (32-bit) floating-point elements in `a` within
133	/// 128-bit lanes using the control in `imm8`.
134	///
135	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_ps)
136	#[inline]
137	#[target_feature(enable = "avx")]
138	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
139	#[rustc_legacy_const_generics(`2`)]
140	#[stable(feature = "simd_x86", since = "1.27.0")]
141	pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
142	static_assert_uimm_bits!(MASK, `8`);
143	simd_shuffle!(
144	a,
145	b,
146	[
147	MASK as u32 & `0b11`,
148	(MASK as u32 >> `2`) & `0b11`,
149	((MASK as u32 >> `4`) & `0b11`) + `8`,
150	((MASK as u32 >> `6`) & `0b11`) + `8`,
151	(MASK as u32 & `0b11`) + `4`,
152	((MASK as u32 >> `2`) & `0b11`) + `4`,
153	((MASK as u32 >> `4`) & `0b11`) + `12`,
154	((MASK as u32 >> `6`) & `0b11`) + `12`,
155	],
156	)
157	}
158
159	/// Computes the bitwise NOT of packed double-precision (64-bit) floating-point
160	/// elements in `a`, and then AND with `b`.
161	///
162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_pd)
163	#[inline]
164	#[target_feature(enable = "avx")]
165	// FIXME: should be `vandnpd` instruction.
166	#[cfg_attr(test, assert_instr(vandnps))]
167	#[stable(feature = "simd_x86", since = "1.27.0")]
168	pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
169	let a: u64x4 = transmute(src:a);
170	let b: u64x4 = transmute(src:b);
171	transmute(src:simd_and(x:simd_xor(u64x4::splat(!(`0_u64`)), a), y:b))
172	}
173
174	/// Computes the bitwise NOT of packed single-precision (32-bit) floating-point
175	/// elements in `a`
176	/// and then AND with `b`.
177	///
178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_ps)
179	#[inline]
180	#[target_feature(enable = "avx")]
181	#[cfg_attr(test, assert_instr(vandnps))]
182	#[stable(feature = "simd_x86", since = "1.27.0")]
183	pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
184	let a: u32x8 = transmute(src:a);
185	let b: u32x8 = transmute(src:b);
186	transmute(src:simd_and(x:simd_xor(u32x8::splat(!(`0_u32`)), a), y:b))
187	}
188
189	/// Compares packed double-precision (64-bit) floating-point elements
190	/// in `a` and `b`, and returns packed maximum values
191	///
192	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_pd)
193	#[inline]
194	#[target_feature(enable = "avx")]
195	#[cfg_attr(test, assert_instr(vmaxpd))]
196	#[stable(feature = "simd_x86", since = "1.27.0")]
197	pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
198	vmaxpd(a, b)
199	}
200
201	/// Compares packed single-precision (32-bit) floating-point elements in `a`
202	/// and `b`, and returns packed maximum values
203	///
204	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_ps)
205	#[inline]
206	#[target_feature(enable = "avx")]
207	#[cfg_attr(test, assert_instr(vmaxps))]
208	#[stable(feature = "simd_x86", since = "1.27.0")]
209	pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
210	vmaxps(a, b)
211	}
212
213	/// Compares packed double-precision (64-bit) floating-point elements
214	/// in `a` and `b`, and returns packed minimum values
215	///
216	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_pd)
217	#[inline]
218	#[target_feature(enable = "avx")]
219	#[cfg_attr(test, assert_instr(vminpd))]
220	#[stable(feature = "simd_x86", since = "1.27.0")]
221	pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
222	vminpd(a, b)
223	}
224
225	/// Compares packed single-precision (32-bit) floating-point elements in `a`
226	/// and `b`, and returns packed minimum values
227	///
228	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_ps)
229	#[inline]
230	#[target_feature(enable = "avx")]
231	#[cfg_attr(test, assert_instr(vminps))]
232	#[stable(feature = "simd_x86", since = "1.27.0")]
233	pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
234	vminps(a, b)
235	}
236
237	/// Multiplies packed double-precision (64-bit) floating-point elements
238	/// in `a` and `b`.
239	///
240	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_pd)
241	#[inline]
242	#[target_feature(enable = "avx")]
243	#[cfg_attr(test, assert_instr(vmulpd))]
244	#[stable(feature = "simd_x86", since = "1.27.0")]
245	pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
246	simd_mul(x:a, y:b)
247	}
248
249	/// Multiplies packed single-precision (32-bit) floating-point elements in `a` and
250	/// `b`.
251	///
252	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_ps)
253	#[inline]
254	#[target_feature(enable = "avx")]
255	#[cfg_attr(test, assert_instr(vmulps))]
256	#[stable(feature = "simd_x86", since = "1.27.0")]
257	pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
258	simd_mul(x:a, y:b)
259	}
260
261	/// Alternatively adds and subtracts packed double-precision (64-bit)
262	/// floating-point elements in `a` to/from packed elements in `b`.
263	///
264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_addsub_pd)
265	#[inline]
266	#[target_feature(enable = "avx")]
267	#[cfg_attr(test, assert_instr(vaddsubpd))]
268	#[stable(feature = "simd_x86", since = "1.27.0")]
269	pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
270	let a: f64x4 = a.as_f64x4();
271	let b: f64x4 = b.as_f64x4();
272	let add: f64x4 = simd_add(x:a, y:b);
273	let sub: f64x4 = simd_sub(lhs:a, rhs:b);
274	simd_shuffle!(add, sub, [`4`, `1`, `6`, `3`])
275	}
276
277	/// Alternatively adds and subtracts packed single-precision (32-bit)
278	/// floating-point elements in `a` to/from packed elements in `b`.
279	///
280	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_addsub_ps)
281	#[inline]
282	#[target_feature(enable = "avx")]
283	#[cfg_attr(test, assert_instr(vaddsubps))]
284	#[stable(feature = "simd_x86", since = "1.27.0")]
285	pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
286	let a: f32x8 = a.as_f32x8();
287	let b: f32x8 = b.as_f32x8();
288	let add: f32x8 = simd_add(x:a, y:b);
289	let sub: f32x8 = simd_sub(lhs:a, rhs:b);
290	simd_shuffle!(add, sub, [`8`, `1`, `10`, `3`, `12`, `5`, `14`, `7`])
291	}
292
293	/// Subtracts packed double-precision (64-bit) floating-point elements in `b`
294	/// from packed elements in `a`.
295	///
296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_pd)
297	#[inline]
298	#[target_feature(enable = "avx")]
299	#[cfg_attr(test, assert_instr(vsubpd))]
300	#[stable(feature = "simd_x86", since = "1.27.0")]
301	pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
302	simd_sub(lhs:a, rhs:b)
303	}
304
305	/// Subtracts packed single-precision (32-bit) floating-point elements in `b`
306	/// from packed elements in `a`.
307	///
308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_ps)
309	#[inline]
310	#[target_feature(enable = "avx")]
311	#[cfg_attr(test, assert_instr(vsubps))]
312	#[stable(feature = "simd_x86", since = "1.27.0")]
313	pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
314	simd_sub(lhs:a, rhs:b)
315	}
316
317	/// Computes the division of each of the 8 packed 32-bit floating-point elements
318	/// in `a` by the corresponding packed elements in `b`.
319	///
320	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_div_ps)
321	#[inline]
322	#[target_feature(enable = "avx")]
323	#[cfg_attr(test, assert_instr(vdivps))]
324	#[stable(feature = "simd_x86", since = "1.27.0")]
325	pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
326	simd_div(lhs:a, rhs:b)
327	}
328
329	/// Computes the division of each of the 4 packed 64-bit floating-point elements
330	/// in `a` by the corresponding packed elements in `b`.
331	///
332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_div_pd)
333	#[inline]
334	#[target_feature(enable = "avx")]
335	#[cfg_attr(test, assert_instr(vdivpd))]
336	#[stable(feature = "simd_x86", since = "1.27.0")]
337	pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
338	simd_div(lhs:a, rhs:b)
339	}
340
341	/// Rounds packed double-precision (64-bit) floating point elements in `a`
342	/// according to the flag `ROUNDING`. The value of `ROUNDING` may be as follows:
343	///
344	/// - `0x00`: Round to the nearest whole number.
345	/// - `0x01`: Round down, toward negative infinity.
346	/// - `0x02`: Round up, toward positive infinity.
347	/// - `0x03`: Truncate the values.
348	///
349	/// For a complete list of options, check [the LLVM docs][llvm_docs].
350	///
351	/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
352	///
353	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_pd)
354	#[inline]
355	#[target_feature(enable = "avx")]
356	#[cfg_attr(test, assert_instr(vroundpd, ROUNDING = `0x3`))]
357	#[rustc_legacy_const_generics(`1`)]
358	#[stable(feature = "simd_x86", since = "1.27.0")]
359	pub unsafe fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d {
360	static_assert_uimm_bits!(ROUNDING, `4`);
361	roundpd256(a, ROUNDING)
362	}
363
364	/// Rounds packed double-precision (64-bit) floating point elements in `a`
365	/// toward positive infinity.
366	///
367	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ceil_pd)
368	#[inline]
369	#[target_feature(enable = "avx")]
370	#[cfg_attr(test, assert_instr(vroundpd))]
371	#[stable(feature = "simd_x86", since = "1.27.0")]
372	pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d {
373	simd_ceil(a)
374	}
375
376	/// Rounds packed double-precision (64-bit) floating point elements in `a`
377	/// toward negative infinity.
378	///
379	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_floor_pd)
380	#[inline]
381	#[target_feature(enable = "avx")]
382	#[cfg_attr(test, assert_instr(vroundpd))]
383	#[stable(feature = "simd_x86", since = "1.27.0")]
384	pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d {
385	simd_floor(a)
386	}
387
388	/// Rounds packed single-precision (32-bit) floating point elements in `a`
389	/// according to the flag `ROUNDING`. The value of `ROUNDING` may be as follows:
390	///
391	/// - `0x00`: Round to the nearest whole number.
392	/// - `0x01`: Round down, toward negative infinity.
393	/// - `0x02`: Round up, toward positive infinity.
394	/// - `0x03`: Truncate the values.
395	///
396	/// For a complete list of options, check [the LLVM docs][llvm_docs].
397	///
398	/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
399	///
400	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_ps)
401	#[inline]
402	#[target_feature(enable = "avx")]
403	#[cfg_attr(test, assert_instr(vroundps, ROUNDING = `0x00`))]
404	#[rustc_legacy_const_generics(`1`)]
405	#[stable(feature = "simd_x86", since = "1.27.0")]
406	pub unsafe fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256 {
407	static_assert_uimm_bits!(ROUNDING, `4`);
408	roundps256(a, ROUNDING)
409	}
410
411	/// Rounds packed single-precision (32-bit) floating point elements in `a`
412	/// toward positive infinity.
413	///
414	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ceil_ps)
415	#[inline]
416	#[target_feature(enable = "avx")]
417	#[cfg_attr(test, assert_instr(vroundps))]
418	#[stable(feature = "simd_x86", since = "1.27.0")]
419	pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 {
420	simd_ceil(a)
421	}
422
423	/// Rounds packed single-precision (32-bit) floating point elements in `a`
424	/// toward negative infinity.
425	///
426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_floor_ps)
427	#[inline]
428	#[target_feature(enable = "avx")]
429	#[cfg_attr(test, assert_instr(vroundps))]
430	#[stable(feature = "simd_x86", since = "1.27.0")]
431	pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 {
432	simd_floor(a)
433	}
434
435	/// Returns the square root of packed single-precision (32-bit) floating point
436	/// elements in `a`.
437	///
438	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_ps)
439	#[inline]
440	#[target_feature(enable = "avx")]
441	#[cfg_attr(test, assert_instr(vsqrtps))]
442	#[stable(feature = "simd_x86", since = "1.27.0")]
443	pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 {
444	sqrtps256(a)
445	}
446
447	/// Returns the square root of packed double-precision (64-bit) floating point
448	/// elements in `a`.
449	///
450	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_pd)
451	#[inline]
452	#[target_feature(enable = "avx")]
453	#[cfg_attr(test, assert_instr(vsqrtpd))]
454	#[stable(feature = "simd_x86", since = "1.27.0")]
455	pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
456	simd_fsqrt(a)
457	}
458
459	/// Blends packed double-precision (64-bit) floating-point elements from
460	/// `a` and `b` using control mask `imm8`.
461	///
462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_pd)
463	#[inline]
464	#[target_feature(enable = "avx")]
465	// Note: LLVM7 prefers single-precision blend instructions when
466	// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194
467	// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
468	#[cfg_attr(test, assert_instr(vblendps, IMM4 = `9`))]
469	#[rustc_legacy_const_generics(`2`)]
470	#[stable(feature = "simd_x86", since = "1.27.0")]
471	pub unsafe fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
472	static_assert_uimm_bits!(IMM4, `4`);
473	simd_shuffle!(
474	a,
475	b,
476	[
477	((IMM4 as u32 >> `0`) & `1`) * `4` + `0`,
478	((IMM4 as u32 >> `1`) & `1`) * `4` + `1`,
479	((IMM4 as u32 >> `2`) & `1`) * `4` + `2`,
480	((IMM4 as u32 >> `3`) & `1`) * `4` + `3`,
481	],
482	)
483	}
484
485	/// Blends packed single-precision (32-bit) floating-point elements from
486	/// `a` and `b` using control mask `imm8`.
487	///
488	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_ps)
489	#[inline]
490	#[target_feature(enable = "avx")]
491	#[cfg_attr(test, assert_instr(vblendps, IMM8 = `9`))]
492	#[rustc_legacy_const_generics(`2`)]
493	#[stable(feature = "simd_x86", since = "1.27.0")]
494	pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
495	static_assert_uimm_bits!(IMM8, `8`);
496	simd_shuffle!(
497	a,
498	b,
499	[
500	((IMM8 as u32 >> `0`) & `1`) * `8` + `0`,
501	((IMM8 as u32 >> `1`) & `1`) * `8` + `1`,
502	((IMM8 as u32 >> `2`) & `1`) * `8` + `2`,
503	((IMM8 as u32 >> `3`) & `1`) * `8` + `3`,
504	((IMM8 as u32 >> `4`) & `1`) * `8` + `4`,
505	((IMM8 as u32 >> `5`) & `1`) * `8` + `5`,
506	((IMM8 as u32 >> `6`) & `1`) * `8` + `6`,
507	((IMM8 as u32 >> `7`) & `1`) * `8` + `7`,
508	],
509	)
510	}
511
512	/// Blends packed double-precision (64-bit) floating-point elements from
513	/// `a` and `b` using `c` as a mask.
514	///
515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_pd)
516	#[inline]
517	#[target_feature(enable = "avx")]
518	#[cfg_attr(test, assert_instr(vblendvpd))]
519	#[stable(feature = "simd_x86", since = "1.27.0")]
520	pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
521	let mask: i64x4 = simd_lt(x:transmute::<_, i64x4>(c), y:i64x4::splat(`0`));
522	transmute(src:simd_select(mask, if_true:b.as_f64x4(), if_false:a.as_f64x4()))
523	}
524
525	/// Blends packed single-precision (32-bit) floating-point elements from
526	/// `a` and `b` using `c` as a mask.
527	///
528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_ps)
529	#[inline]
530	#[target_feature(enable = "avx")]
531	#[cfg_attr(test, assert_instr(vblendvps))]
532	#[stable(feature = "simd_x86", since = "1.27.0")]
533	pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
534	let mask: i32x8 = simd_lt(x:transmute::<_, i32x8>(c), y:i32x8::splat(`0`));
535	transmute(src:simd_select(mask, if_true:b.as_f32x8(), if_false:a.as_f32x8()))
536	}
537
538	/// Conditionally multiplies the packed single-precision (32-bit) floating-point
539	/// elements in `a` and `b` using the high 4 bits in `imm8`,
540	/// sum the four products, and conditionally return the sum
541	/// using the low 4 bits of `imm8`.
542	///
543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dp_ps)
544	#[inline]
545	#[target_feature(enable = "avx")]
546	#[cfg_attr(test, assert_instr(vdpps, IMM8 = `0x0`))]
547	#[rustc_legacy_const_generics(`2`)]
548	#[stable(feature = "simd_x86", since = "1.27.0")]
549	pub unsafe fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
550	static_assert_uimm_bits!(IMM8, `8`);
551	vdpps(a, b, IMM8)
552	}
553
554	/// Horizontal addition of adjacent pairs in the two packed vectors
555	/// of 4 64-bit floating points `a` and `b`.
556	/// In the result, sums of elements from `a` are returned in even locations,
557	/// while sums of elements from `b` are returned in odd locations.
558	///
559	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_pd)
560	#[inline]
561	#[target_feature(enable = "avx")]
562	#[cfg_attr(test, assert_instr(vhaddpd))]
563	#[stable(feature = "simd_x86", since = "1.27.0")]
564	pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
565	vhaddpd(a, b)
566	}
567
568	/// Horizontal addition of adjacent pairs in the two packed vectors
569	/// of 8 32-bit floating points `a` and `b`.
570	/// In the result, sums of elements from `a` are returned in locations of
571	/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
572	/// 2, 3, 6, 7.
573	///
574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_ps)
575	#[inline]
576	#[target_feature(enable = "avx")]
577	#[cfg_attr(test, assert_instr(vhaddps))]
578	#[stable(feature = "simd_x86", since = "1.27.0")]
579	pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
580	vhaddps(a, b)
581	}
582
583	/// Horizontal subtraction of adjacent pairs in the two packed vectors
584	/// of 4 64-bit floating points `a` and `b`.
585	/// In the result, sums of elements from `a` are returned in even locations,
586	/// while sums of elements from `b` are returned in odd locations.
587	///
588	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_pd)
589	#[inline]
590	#[target_feature(enable = "avx")]
591	#[cfg_attr(test, assert_instr(vhsubpd))]
592	#[stable(feature = "simd_x86", since = "1.27.0")]
593	pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
594	vhsubpd(a, b)
595	}
596
597	/// Horizontal subtraction of adjacent pairs in the two packed vectors
598	/// of 8 32-bit floating points `a` and `b`.
599	/// In the result, sums of elements from `a` are returned in locations of
600	/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
601	/// 2, 3, 6, 7.
602	///
603	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_ps)
604	#[inline]
605	#[target_feature(enable = "avx")]
606	#[cfg_attr(test, assert_instr(vhsubps))]
607	#[stable(feature = "simd_x86", since = "1.27.0")]
608	pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
609	vhsubps(a, b)
610	}
611
612	/// Computes the bitwise XOR of packed double-precision (64-bit) floating-point
613	/// elements in `a` and `b`.
614	///
615	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_pd)
616	#[inline]
617	#[target_feature(enable = "avx")]
618	// FIXME Should be 'vxorpd' instruction.
619	#[cfg_attr(test, assert_instr(vxorps))]
620	#[stable(feature = "simd_x86", since = "1.27.0")]
621	pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
622	let a: u64x4 = transmute(src:a);
623	let b: u64x4 = transmute(src:b);
624	transmute(src:simd_xor(x:a, y:b))
625	}
626
627	/// Computes the bitwise XOR of packed single-precision (32-bit) floating-point
628	/// elements in `a` and `b`.
629	///
630	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_ps)
631	#[inline]
632	#[target_feature(enable = "avx")]
633	#[cfg_attr(test, assert_instr(vxorps))]
634	#[stable(feature = "simd_x86", since = "1.27.0")]
635	pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
636	let a: u32x8 = transmute(src:a);
637	let b: u32x8 = transmute(src:b);
638	transmute(src:simd_xor(x:a, y:b))
639	}
640
641	/// Equal (ordered, non-signaling)
642	#[stable(feature = "simd_x86", since = "1.27.0")]
643	pub const _CMP_EQ_OQ: i32 = `0x00`;
644	/// Less-than (ordered, signaling)
645	#[stable(feature = "simd_x86", since = "1.27.0")]
646	pub const _CMP_LT_OS: i32 = `0x01`;
647	/// Less-than-or-equal (ordered, signaling)
648	#[stable(feature = "simd_x86", since = "1.27.0")]
649	pub const _CMP_LE_OS: i32 = `0x02`;
650	/// Unordered (non-signaling)
651	#[stable(feature = "simd_x86", since = "1.27.0")]
652	pub const _CMP_UNORD_Q: i32 = `0x03`;
653	/// Not-equal (unordered, non-signaling)
654	#[stable(feature = "simd_x86", since = "1.27.0")]
655	pub const _CMP_NEQ_UQ: i32 = `0x04`;
656	/// Not-less-than (unordered, signaling)
657	#[stable(feature = "simd_x86", since = "1.27.0")]
658	pub const _CMP_NLT_US: i32 = `0x05`;
659	/// Not-less-than-or-equal (unordered, signaling)
660	#[stable(feature = "simd_x86", since = "1.27.0")]
661	pub const _CMP_NLE_US: i32 = `0x06`;
662	/// Ordered (non-signaling)
663	#[stable(feature = "simd_x86", since = "1.27.0")]
664	pub const _CMP_ORD_Q: i32 = `0x07`;
665	/// Equal (unordered, non-signaling)
666	#[stable(feature = "simd_x86", since = "1.27.0")]
667	pub const _CMP_EQ_UQ: i32 = `0x08`;
668	/// Not-greater-than-or-equal (unordered, signaling)
669	#[stable(feature = "simd_x86", since = "1.27.0")]
670	pub const _CMP_NGE_US: i32 = `0x09`;
671	/// Not-greater-than (unordered, signaling)
672	#[stable(feature = "simd_x86", since = "1.27.0")]
673	pub const _CMP_NGT_US: i32 = `0x0a`;
674	/// False (ordered, non-signaling)
675	#[stable(feature = "simd_x86", since = "1.27.0")]
676	pub const _CMP_FALSE_OQ: i32 = `0x0b`;
677	/// Not-equal (ordered, non-signaling)
678	#[stable(feature = "simd_x86", since = "1.27.0")]
679	pub const _CMP_NEQ_OQ: i32 = `0x0c`;
680	/// Greater-than-or-equal (ordered, signaling)
681	#[stable(feature = "simd_x86", since = "1.27.0")]
682	pub const _CMP_GE_OS: i32 = `0x0d`;
683	/// Greater-than (ordered, signaling)
684	#[stable(feature = "simd_x86", since = "1.27.0")]
685	pub const _CMP_GT_OS: i32 = `0x0e`;
686	/// True (unordered, non-signaling)
687	#[stable(feature = "simd_x86", since = "1.27.0")]
688	pub const _CMP_TRUE_UQ: i32 = `0x0f`;
689	/// Equal (ordered, signaling)
690	#[stable(feature = "simd_x86", since = "1.27.0")]
691	pub const _CMP_EQ_OS: i32 = `0x10`;
692	/// Less-than (ordered, non-signaling)
693	#[stable(feature = "simd_x86", since = "1.27.0")]
694	pub const _CMP_LT_OQ: i32 = `0x11`;
695	/// Less-than-or-equal (ordered, non-signaling)
696	#[stable(feature = "simd_x86", since = "1.27.0")]
697	pub const _CMP_LE_OQ: i32 = `0x12`;
698	/// Unordered (signaling)
699	#[stable(feature = "simd_x86", since = "1.27.0")]
700	pub const _CMP_UNORD_S: i32 = `0x13`;
701	/// Not-equal (unordered, signaling)
702	#[stable(feature = "simd_x86", since = "1.27.0")]
703	pub const _CMP_NEQ_US: i32 = `0x14`;
704	/// Not-less-than (unordered, non-signaling)
705	#[stable(feature = "simd_x86", since = "1.27.0")]
706	pub const _CMP_NLT_UQ: i32 = `0x15`;
707	/// Not-less-than-or-equal (unordered, non-signaling)
708	#[stable(feature = "simd_x86", since = "1.27.0")]
709	pub const _CMP_NLE_UQ: i32 = `0x16`;
710	/// Ordered (signaling)
711	#[stable(feature = "simd_x86", since = "1.27.0")]
712	pub const _CMP_ORD_S: i32 = `0x17`;
713	/// Equal (unordered, signaling)
714	#[stable(feature = "simd_x86", since = "1.27.0")]
715	pub const _CMP_EQ_US: i32 = `0x18`;
716	/// Not-greater-than-or-equal (unordered, non-signaling)
717	#[stable(feature = "simd_x86", since = "1.27.0")]
718	pub const _CMP_NGE_UQ: i32 = `0x19`;
719	/// Not-greater-than (unordered, non-signaling)
720	#[stable(feature = "simd_x86", since = "1.27.0")]
721	pub const _CMP_NGT_UQ: i32 = `0x1a`;
722	/// False (ordered, signaling)
723	#[stable(feature = "simd_x86", since = "1.27.0")]
724	pub const _CMP_FALSE_OS: i32 = `0x1b`;
725	/// Not-equal (ordered, signaling)
726	#[stable(feature = "simd_x86", since = "1.27.0")]
727	pub const _CMP_NEQ_OS: i32 = `0x1c`;
728	/// Greater-than-or-equal (ordered, non-signaling)
729	#[stable(feature = "simd_x86", since = "1.27.0")]
730	pub const _CMP_GE_OQ: i32 = `0x1d`;
731	/// Greater-than (ordered, non-signaling)
732	#[stable(feature = "simd_x86", since = "1.27.0")]
733	pub const _CMP_GT_OQ: i32 = `0x1e`;
734	/// True (unordered, signaling)
735	#[stable(feature = "simd_x86", since = "1.27.0")]
736	pub const _CMP_TRUE_US: i32 = `0x1f`;
737
738	/// Compares packed double-precision (64-bit) floating-point
739	/// elements in `a` and `b` based on the comparison operand
740	/// specified by `IMM5`.
741	///
742	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd)
743	#[inline]
744	#[target_feature(enable = "avx,sse2")]
745	#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = `0`))] // TODO Validate vcmppd
746	#[rustc_legacy_const_generics(`2`)]
747	#[stable(feature = "simd_x86", since = "1.27.0")]
748	pub unsafe fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
749	static_assert_uimm_bits!(IMM5, `5`);
750	vcmppd(a, b, imm8:const { IMM5 as i8 })
751	}
752
753	/// Compares packed double-precision (64-bit) floating-point
754	/// elements in `a` and `b` based on the comparison operand
755	/// specified by `IMM5`.
756	///
757	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd)
758	#[inline]
759	#[target_feature(enable = "avx")]
760	#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = `0`))] // TODO Validate vcmppd
761	#[rustc_legacy_const_generics(`2`)]
762	#[stable(feature = "simd_x86", since = "1.27.0")]
763	pub unsafe fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d {
764	static_assert_uimm_bits!(IMM5, `5`);
765	vcmppd256(a, b, IMM5 as u8)
766	}
767
768	/// Compares packed single-precision (32-bit) floating-point
769	/// elements in `a` and `b` based on the comparison operand
770	/// specified by `IMM5`.
771	///
772	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps)
773	#[inline]
774	#[target_feature(enable = "avx,sse")]
775	#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = `0`))] // TODO Validate vcmpps
776	#[rustc_legacy_const_generics(`2`)]
777	#[stable(feature = "simd_x86", since = "1.27.0")]
778	pub unsafe fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
779	static_assert_uimm_bits!(IMM5, `5`);
780	vcmpps(a, b, imm8:const { IMM5 as i8 })
781	}
782
783	/// Compares packed single-precision (32-bit) floating-point
784	/// elements in `a` and `b` based on the comparison operand
785	/// specified by `IMM5`.
786	///
787	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps)
788	#[inline]
789	#[target_feature(enable = "avx")]
790	#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = `0`))] // TODO Validate vcmpps
791	#[rustc_legacy_const_generics(`2`)]
792	#[stable(feature = "simd_x86", since = "1.27.0")]
793	pub unsafe fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256 {
794	static_assert_uimm_bits!(IMM5, `5`);
795	vcmpps256(a, b, imm8:const { IMM5 as u8 })
796	}
797
798	/// Compares the lower double-precision (64-bit) floating-point element in
799	/// `a` and `b` based on the comparison operand specified by `IMM5`,
800	/// store the result in the lower element of returned vector,
801	/// and copies the upper element from `a` to the upper element of returned
802	/// vector.
803	///
804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd)
805	#[inline]
806	#[target_feature(enable = "avx,sse2")]
807	#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = `0`))] // TODO Validate vcmpsd
808	#[rustc_legacy_const_generics(`2`)]
809	#[stable(feature = "simd_x86", since = "1.27.0")]
810	pub unsafe fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
811	static_assert_uimm_bits!(IMM5, `5`);
812	vcmpsd(a, b, IMM5 as i8)
813	}
814
815	/// Compares the lower single-precision (32-bit) floating-point element in
816	/// `a` and `b` based on the comparison operand specified by `IMM5`,
817	/// store the result in the lower element of returned vector,
818	/// and copies the upper 3 packed elements from `a` to the upper elements of
819	/// returned vector.
820	///
821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss)
822	#[inline]
823	#[target_feature(enable = "avx,sse")]
824	#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = `0`))] // TODO Validate vcmpss
825	#[rustc_legacy_const_generics(`2`)]
826	#[stable(feature = "simd_x86", since = "1.27.0")]
827	pub unsafe fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
828	static_assert_uimm_bits!(IMM5, `5`);
829	vcmpss(a, b, IMM5 as i8)
830	}
831
832	/// Converts packed 32-bit integers in `a` to packed double-precision (64-bit)
833	/// floating-point elements.
834	///
835	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_pd)
836	#[inline]
837	#[target_feature(enable = "avx")]
838	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
839	#[stable(feature = "simd_x86", since = "1.27.0")]
840	pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
841	simd_cast(a.as_i32x4())
842	}
843
844	/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
845	/// floating-point elements.
846	///
847	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_ps)
848	#[inline]
849	#[target_feature(enable = "avx")]
850	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
851	#[stable(feature = "simd_x86", since = "1.27.0")]
852	pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
853	simd_cast(a.as_i32x8())
854	}
855
856	/// Converts packed double-precision (64-bit) floating-point elements in `a`
857	/// to packed single-precision (32-bit) floating-point elements.
858	///
859	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_ps)
860	#[inline]
861	#[target_feature(enable = "avx")]
862	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
863	#[stable(feature = "simd_x86", since = "1.27.0")]
864	pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
865	simd_cast(a)
866	}
867
868	/// Converts packed single-precision (32-bit) floating-point elements in `a`
869	/// to packed 32-bit integers.
870	///
871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi32)
872	#[inline]
873	#[target_feature(enable = "avx")]
874	#[cfg_attr(test, assert_instr(vcvtps2dq))]
875	#[stable(feature = "simd_x86", since = "1.27.0")]
876	pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i {
877	transmute(src:vcvtps2dq(a))
878	}
879
880	/// Converts packed single-precision (32-bit) floating-point elements in `a`
881	/// to packed double-precision (64-bit) floating-point elements.
882	///
883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_pd)
884	#[inline]
885	#[target_feature(enable = "avx")]
886	#[cfg_attr(test, assert_instr(vcvtps2pd))]
887	#[stable(feature = "simd_x86", since = "1.27.0")]
888	pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d {
889	simd_cast(a)
890	}
891
892	/// Converts packed double-precision (64-bit) floating-point elements in `a`
893	/// to packed 32-bit integers with truncation.
894	///
895	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi32)
896	#[inline]
897	#[target_feature(enable = "avx")]
898	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
899	#[stable(feature = "simd_x86", since = "1.27.0")]
900	pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i {
901	transmute(src:vcvttpd2dq(a))
902	}
903
904	/// Converts packed double-precision (64-bit) floating-point elements in `a`
905	/// to packed 32-bit integers.
906	///
907	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi32)
908	#[inline]
909	#[target_feature(enable = "avx")]
910	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
911	#[stable(feature = "simd_x86", since = "1.27.0")]
912	pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i {
913	transmute(src:vcvtpd2dq(a))
914	}
915
916	/// Converts packed single-precision (32-bit) floating-point elements in `a`
917	/// to packed 32-bit integers with truncation.
918	///
919	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi32)
920	#[inline]
921	#[target_feature(enable = "avx")]
922	#[cfg_attr(test, assert_instr(vcvttps2dq))]
923	#[stable(feature = "simd_x86", since = "1.27.0")]
924	pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
925	transmute(src:vcvttps2dq(a))
926	}
927
928	/// Extracts 128 bits (composed of 4 packed single-precision (32-bit)
929	/// floating-point elements) from `a`, selected with `imm8`.
930	///
931	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_ps)
932	#[inline]
933	#[target_feature(enable = "avx")]
934	#[cfg_attr(
935	all(test, not(target_os = "windows")),
936	assert_instr(vextractf128, IMM1 = `1`)
937	)]
938	#[rustc_legacy_const_generics(`1`)]
939	#[stable(feature = "simd_x86", since = "1.27.0")]
940	pub unsafe fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
941	static_assert_uimm_bits!(IMM1, `1`);
942	simd_shuffle!(
943	a,
944	_mm256_undefined_ps(),
945	[[`0`, `1`, `2`, `3`], [`4`, `5`, `6`, `7`]][IMM1 as usize],
946	)
947	}
948
949	/// Extracts 128 bits (composed of 2 packed double-precision (64-bit)
950	/// floating-point elements) from `a`, selected with `imm8`.
951	///
952	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_pd)
953	#[inline]
954	#[target_feature(enable = "avx")]
955	#[cfg_attr(
956	all(test, not(target_os = "windows")),
957	assert_instr(vextractf128, IMM1 = `1`)
958	)]
959	#[rustc_legacy_const_generics(`1`)]
960	#[stable(feature = "simd_x86", since = "1.27.0")]
961	pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
962	static_assert_uimm_bits!(IMM1, `1`);
963	simd_shuffle!(a, _mm256_undefined_pd(), [[`0`, `1`], [`2`, `3`]][IMM1 as usize])
964	}
965
966	/// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`.
967	///
968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_si256)
969	#[inline]
970	#[target_feature(enable = "avx")]
971	#[cfg_attr(
972	all(test, not(target_os = "windows")),
973	assert_instr(vextractf128, IMM1 = `1`)
974	)]
975	#[rustc_legacy_const_generics(`1`)]
976	#[stable(feature = "simd_x86", since = "1.27.0")]
977	pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
978	static_assert_uimm_bits!(IMM1, `1`);
979	let dst: i64x2 = simd_shuffle!(
980	a.as_i64x4(),
981	_mm256_undefined_si256().as_i64x4(),
982	[[`0`, `1`], [`2`, `3`]][IMM1 as usize],
983	);
984	transmute(src:dst)
985	}
986
987	/// Zeroes the contents of all XMM or YMM registers.
988	///
989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)
990	#[inline]
991	#[target_feature(enable = "avx")]
992	#[cfg_attr(test, assert_instr(vzeroall))]
993	#[stable(feature = "simd_x86", since = "1.27.0")]
994	pub unsafe fn _mm256_zeroall() {
995	vzeroall()
996	}
997
998	/// Zeroes the upper 128 bits of all YMM registers;
999	/// the lower 128-bits of the registers are unmodified.
1000	///
1001	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper)
1002	#[inline]
1003	#[target_feature(enable = "avx")]
1004	#[cfg_attr(test, assert_instr(vzeroupper))]
1005	#[stable(feature = "simd_x86", since = "1.27.0")]
1006	pub unsafe fn _mm256_zeroupper() {
1007	vzeroupper()
1008	}
1009
1010	/// Shuffles single-precision (32-bit) floating-point elements in `a`
1011	/// within 128-bit lanes using the control in `b`.
1012	///
1013	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_ps)
1014	#[inline]
1015	#[target_feature(enable = "avx")]
1016	#[cfg_attr(test, assert_instr(vpermilps))]
1017	#[stable(feature = "simd_x86", since = "1.27.0")]
1018	pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 {
1019	vpermilps256(a, b:b.as_i32x8())
1020	}
1021
1022	/// Shuffles single-precision (32-bit) floating-point elements in `a`
1023	/// using the control in `b`.
1024	///
1025	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_ps)
1026	#[inline]
1027	#[target_feature(enable = "avx")]
1028	#[cfg_attr(test, assert_instr(vpermilps))]
1029	#[stable(feature = "simd_x86", since = "1.27.0")]
1030	pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
1031	vpermilps(a, b:b.as_i32x4())
1032	}
1033
1034	/// Shuffles single-precision (32-bit) floating-point elements in `a`
1035	/// within 128-bit lanes using the control in `imm8`.
1036	///
1037	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute_ps)
1038	#[inline]
1039	#[target_feature(enable = "avx")]
1040	#[cfg_attr(test, assert_instr(vshufps, IMM8 = `9`))]
1041	#[rustc_legacy_const_generics(`1`)]
1042	#[stable(feature = "simd_x86", since = "1.27.0")]
1043	pub unsafe fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
1044	static_assert_uimm_bits!(IMM8, `8`);
1045	simd_shuffle!(
1046	a,
1047	_mm256_undefined_ps(),
1048	[
1049	(IMM8 as u32 >> `0`) & `0b11`,
1050	(IMM8 as u32 >> `2`) & `0b11`,
1051	(IMM8 as u32 >> `4`) & `0b11`,
1052	(IMM8 as u32 >> `6`) & `0b11`,
1053	((IMM8 as u32 >> `0`) & `0b11`) + `4`,
1054	((IMM8 as u32 >> `2`) & `0b11`) + `4`,
1055	((IMM8 as u32 >> `4`) & `0b11`) + `4`,
1056	((IMM8 as u32 >> `6`) & `0b11`) + `4`,
1057	],
1058	)
1059	}
1060
1061	/// Shuffles single-precision (32-bit) floating-point elements in `a`
1062	/// using the control in `imm8`.
1063	///
1064	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_ps)
1065	#[inline]
1066	#[target_feature(enable = "avx,sse")]
1067	#[cfg_attr(test, assert_instr(vshufps, IMM8 = `9`))]
1068	#[rustc_legacy_const_generics(`1`)]
1069	#[stable(feature = "simd_x86", since = "1.27.0")]
1070	pub unsafe fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
1071	static_assert_uimm_bits!(IMM8, `8`);
1072	simd_shuffle!(
1073	a,
1074	_mm_undefined_ps(),
1075	[
1076	(IMM8 as u32 >> `0`) & `0b11`,
1077	(IMM8 as u32 >> `2`) & `0b11`,
1078	(IMM8 as u32 >> `4`) & `0b11`,
1079	(IMM8 as u32 >> `6`) & `0b11`,
1080	],
1081	)
1082	}
1083
1084	/// Shuffles double-precision (64-bit) floating-point elements in `a`
1085	/// within 256-bit lanes using the control in `b`.
1086	///
1087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_pd)
1088	#[inline]
1089	#[target_feature(enable = "avx")]
1090	#[cfg_attr(test, assert_instr(vpermilpd))]
1091	#[stable(feature = "simd_x86", since = "1.27.0")]
1092	pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d {
1093	vpermilpd256(a, b:b.as_i64x4())
1094	}
1095
1096	/// Shuffles double-precision (64-bit) floating-point elements in `a`
1097	/// using the control in `b`.
1098	///
1099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_pd)
1100	#[inline]
1101	#[target_feature(enable = "avx")]
1102	#[cfg_attr(test, assert_instr(vpermilpd))]
1103	#[stable(feature = "simd_x86", since = "1.27.0")]
1104	pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
1105	vpermilpd(a, b:b.as_i64x2())
1106	}
1107
1108	/// Shuffles double-precision (64-bit) floating-point elements in `a`
1109	/// within 128-bit lanes using the control in `imm8`.
1110	///
1111	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute_pd)
1112	#[inline]
1113	#[target_feature(enable = "avx")]
1114	#[cfg_attr(test, assert_instr(vshufpd, IMM4 = `0x1`))]
1115	#[rustc_legacy_const_generics(`1`)]
1116	#[stable(feature = "simd_x86", since = "1.27.0")]
1117	pub unsafe fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
1118	static_assert_uimm_bits!(IMM4, `4`);
1119	simd_shuffle!(
1120	a,
1121	_mm256_undefined_pd(),
1122	[
1123	((IMM4 as u32 >> `0`) & `1`),
1124	((IMM4 as u32 >> `1`) & `1`),
1125	((IMM4 as u32 >> `2`) & `1`) + `2`,
1126	((IMM4 as u32 >> `3`) & `1`) + `2`,
1127	],
1128	)
1129	}
1130
1131	/// Shuffles double-precision (64-bit) floating-point elements in `a`
1132	/// using the control in `imm8`.
1133	///
1134	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_pd)
1135	#[inline]
1136	#[target_feature(enable = "avx,sse2")]
1137	#[cfg_attr(test, assert_instr(vshufpd, IMM2 = `0x1`))]
1138	#[rustc_legacy_const_generics(`1`)]
1139	#[stable(feature = "simd_x86", since = "1.27.0")]
1140	pub unsafe fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
1141	static_assert_uimm_bits!(IMM2, `2`);
1142	simd_shuffle!(
1143	a,
1144	_mm_undefined_pd(),
1145	[(IMM2 as u32) & `1`, (IMM2 as u32 >> `1`) & `1`],
1146	)
1147	}
1148
1149	/// Shuffles 256 bits (composed of 8 packed single-precision (32-bit)
1150	/// floating-point elements) selected by `imm8` from `a` and `b`.
1151	///
1152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_ps)
1153	#[inline]
1154	#[target_feature(enable = "avx")]
1155	#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = `0x5`))]
1156	#[rustc_legacy_const_generics(`2`)]
1157	#[stable(feature = "simd_x86", since = "1.27.0")]
1158	pub unsafe fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
1159	static_assert_uimm_bits!(IMM8, `8`);
1160	vperm2f128ps256(a, b, IMM8 as i8)
1161	}
1162
1163	/// Shuffles 256 bits (composed of 4 packed double-precision (64-bit)
1164	/// floating-point elements) selected by `imm8` from `a` and `b`.
1165	///
1166	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_pd)
1167	#[inline]
1168	#[target_feature(enable = "avx")]
1169	#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = `0x31`))]
1170	#[rustc_legacy_const_generics(`2`)]
1171	#[stable(feature = "simd_x86", since = "1.27.0")]
1172	pub unsafe fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
1173	static_assert_uimm_bits!(IMM8, `8`);
1174	vperm2f128pd256(a, b, IMM8 as i8)
1175	}
1176
1177	/// Shuffles 128-bits (composed of integer data) selected by `imm8`
1178	/// from `a` and `b`.
1179	///
1180	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_si256)
1181	#[inline]
1182	#[target_feature(enable = "avx")]
1183	#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = `0x31`))]
1184	#[rustc_legacy_const_generics(`2`)]
1185	#[stable(feature = "simd_x86", since = "1.27.0")]
1186	pub unsafe fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1187	static_assert_uimm_bits!(IMM8, `8`);
1188	transmute(src:vperm2f128si256(a:a.as_i32x8(), b:b.as_i32x8(), IMM8 as i8))
1189	}
1190
1191	/// Broadcasts a single-precision (32-bit) floating-point element from memory
1192	/// to all elements of the returned vector.
1193	///
1194	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_ss)
1195	#[inline]
1196	#[target_feature(enable = "avx")]
1197	#[cfg_attr(test, assert_instr(vbroadcastss))]
1198	#[stable(feature = "simd_x86", since = "1.27.0")]
1199	#[allow(clippy::trivially_copy_pass_by_ref)]
1200	pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
1201	_mm256_set1_ps(*f)
1202	}
1203
1204	/// Broadcasts a single-precision (32-bit) floating-point element from memory
1205	/// to all elements of the returned vector.
1206	///
1207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_ss)
1208	#[inline]
1209	#[target_feature(enable = "avx")]
1210	#[cfg_attr(test, assert_instr(vbroadcastss))]
1211	#[stable(feature = "simd_x86", since = "1.27.0")]
1212	#[allow(clippy::trivially_copy_pass_by_ref)]
1213	pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
1214	_mm_set1_ps(*f)
1215	}
1216
1217	/// Broadcasts a double-precision (64-bit) floating-point element from memory
1218	/// to all elements of the returned vector.
1219	///
1220	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_sd)
1221	#[inline]
1222	#[target_feature(enable = "avx")]
1223	#[cfg_attr(test, assert_instr(vbroadcastsd))]
1224	#[stable(feature = "simd_x86", since = "1.27.0")]
1225	#[allow(clippy::trivially_copy_pass_by_ref)]
1226	pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
1227	_mm256_set1_pd(*f)
1228	}
1229
1230	/// Broadcasts 128 bits from memory (composed of 4 packed single-precision
1231	/// (32-bit) floating-point elements) to all elements of the returned vector.
1232	///
1233	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_ps)
1234	#[inline]
1235	#[target_feature(enable = "avx")]
1236	#[cfg_attr(test, assert_instr(vbroadcastf128))]
1237	#[stable(feature = "simd_x86", since = "1.27.0")]
1238	pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
1239	simd_shuffle!(*a, _mm_setzero_ps(), [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`])
1240	}
1241
1242	/// Broadcasts 128 bits from memory (composed of 2 packed double-precision
1243	/// (64-bit) floating-point elements) to all elements of the returned vector.
1244	///
1245	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_pd)
1246	#[inline]
1247	#[target_feature(enable = "avx")]
1248	#[cfg_attr(test, assert_instr(vbroadcastf128))]
1249	#[stable(feature = "simd_x86", since = "1.27.0")]
1250	pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
1251	simd_shuffle!(*a, _mm_setzero_pd(), [`0`, `1`, `0`, `1`])
1252	}
1253
1254	/// Copies `a` to result, then inserts 128 bits (composed of 4 packed
1255	/// single-precision (32-bit) floating-point elements) from `b` into result
1256	/// at the location specified by `imm8`.
1257	///
1258	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_ps)
1259	#[inline]
1260	#[target_feature(enable = "avx")]
1261	#[cfg_attr(
1262	all(test, not(target_os = "windows")),
1263	assert_instr(vinsertf128, IMM1 = `1`)
1264	)]
1265	#[rustc_legacy_const_generics(`2`)]
1266	#[stable(feature = "simd_x86", since = "1.27.0")]
1267	pub unsafe fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
1268	static_assert_uimm_bits!(IMM1, `1`);
1269	simd_shuffle!(
1270	a,
1271	_mm256_castps128_ps256(b),
1272	[[`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`], [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]][IMM1 as usize],
1273	)
1274	}
1275
1276	/// Copies `a` to result, then inserts 128 bits (composed of 2 packed
1277	/// double-precision (64-bit) floating-point elements) from `b` into result
1278	/// at the location specified by `imm8`.
1279	///
1280	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_pd)
1281	#[inline]
1282	#[target_feature(enable = "avx")]
1283	#[cfg_attr(
1284	all(test, not(target_os = "windows")),
1285	assert_instr(vinsertf128, IMM1 = `1`)
1286	)]
1287	#[rustc_legacy_const_generics(`2`)]
1288	#[stable(feature = "simd_x86", since = "1.27.0")]
1289	pub unsafe fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
1290	static_assert_uimm_bits!(IMM1, `1`);
1291	simd_shuffle!(
1292	a,
1293	_mm256_castpd128_pd256(b),
1294	[[`4`, `5`, `2`, `3`], [`0`, `1`, `4`, `5`]][IMM1 as usize],
1295	)
1296	}
1297
1298	/// Copies `a` to result, then inserts 128 bits from `b` into result
1299	/// at the location specified by `imm8`.
1300	///
1301	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_si256)
1302	#[inline]
1303	#[target_feature(enable = "avx")]
1304	#[cfg_attr(
1305	all(test, not(target_os = "windows")),
1306	assert_instr(vinsertf128, IMM1 = `1`)
1307	)]
1308	#[rustc_legacy_const_generics(`2`)]
1309	#[stable(feature = "simd_x86", since = "1.27.0")]
1310	pub unsafe fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1311	static_assert_uimm_bits!(IMM1, `1`);
1312	let dst: i64x4 = simd_shuffle!(
1313	a.as_i64x4(),
1314	_mm256_castsi128_si256(b).as_i64x4(),
1315	[[`4`, `5`, `2`, `3`], [`0`, `1`, `4`, `5`]][IMM1 as usize],
1316	);
1317	transmute(src:dst)
1318	}
1319
1320	/// Copies `a` to result, and inserts the 8-bit integer `i` into result
1321	/// at the location specified by `index`.
1322	///
1323	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi8)
1324	#[inline]
1325	#[target_feature(enable = "avx")]
1326	// This intrinsic has no corresponding instruction.
1327	#[rustc_legacy_const_generics(`2`)]
1328	#[stable(feature = "simd_x86", since = "1.27.0")]
1329	pub unsafe fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
1330	static_assert_uimm_bits!(INDEX, `5`);
1331	transmute(src:simd_insert!(a.as_i8x32(), INDEX as u32, i))
1332	}
1333
1334	/// Copies `a` to result, and inserts the 16-bit integer `i` into result
1335	/// at the location specified by `index`.
1336	///
1337	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi16)
1338	#[inline]
1339	#[target_feature(enable = "avx")]
1340	// This intrinsic has no corresponding instruction.
1341	#[rustc_legacy_const_generics(`2`)]
1342	#[stable(feature = "simd_x86", since = "1.27.0")]
1343	pub unsafe fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
1344	static_assert_uimm_bits!(INDEX, `4`);
1345	transmute(src:simd_insert!(a.as_i16x16(), INDEX as u32, i))
1346	}
1347
1348	/// Copies `a` to result, and inserts the 32-bit integer `i` into result
1349	/// at the location specified by `index`.
1350	///
1351	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi32)
1352	#[inline]
1353	#[target_feature(enable = "avx")]
1354	// This intrinsic has no corresponding instruction.
1355	#[rustc_legacy_const_generics(`2`)]
1356	#[stable(feature = "simd_x86", since = "1.27.0")]
1357	pub unsafe fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
1358	static_assert_uimm_bits!(INDEX, `3`);
1359	transmute(src:simd_insert!(a.as_i32x8(), INDEX as u32, i))
1360	}
1361
1362	/// Loads 256-bits (composed of 4 packed double-precision (64-bit)
1363	/// floating-point elements) from memory into result.
1364	/// `mem_addr` must be aligned on a 32-byte boundary or a
1365	/// general-protection exception may be generated.
1366	///
1367	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd)
1368	#[inline]
1369	#[target_feature(enable = "avx")]
1370	#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
1371	#[stable(feature = "simd_x86", since = "1.27.0")]
1372	#[allow(clippy::cast_ptr_alignment)]
1373	pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
1374	(mem_addr as const __m256d)
1375	}
1376
1377	/// Stores 256-bits (composed of 4 packed double-precision (64-bit)
1378	/// floating-point elements) from `a` into memory.
1379	/// `mem_addr` must be aligned on a 32-byte boundary or a
1380	/// general-protection exception may be generated.
1381	///
1382	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd)
1383	#[inline]
1384	#[target_feature(enable = "avx")]
1385	#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
1386	#[stable(feature = "simd_x86", since = "1.27.0")]
1387	#[allow(clippy::cast_ptr_alignment)]
1388	pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
1389	(mem_addr as mut __m256d) = a;
1390	}
1391
1392	/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
1393	/// floating-point elements) from memory into result.
1394	/// `mem_addr` must be aligned on a 32-byte boundary or a
1395	/// general-protection exception may be generated.
1396	///
1397	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_ps)
1398	#[inline]
1399	#[target_feature(enable = "avx")]
1400	#[cfg_attr(test, assert_instr(vmovaps))]
1401	#[stable(feature = "simd_x86", since = "1.27.0")]
1402	#[allow(clippy::cast_ptr_alignment)]
1403	pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
1404	(mem_addr as const __m256)
1405	}
1406
1407	/// Stores 256-bits (composed of 8 packed single-precision (32-bit)
1408	/// floating-point elements) from `a` into memory.
1409	/// `mem_addr` must be aligned on a 32-byte boundary or a
1410	/// general-protection exception may be generated.
1411	///
1412	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_ps)
1413	#[inline]
1414	#[target_feature(enable = "avx")]
1415	#[cfg_attr(test, assert_instr(vmovaps))]
1416	#[stable(feature = "simd_x86", since = "1.27.0")]
1417	#[allow(clippy::cast_ptr_alignment)]
1418	pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
1419	(mem_addr as mut __m256) = a;
1420	}
1421
1422	/// Loads 256-bits (composed of 4 packed double-precision (64-bit)
1423	/// floating-point elements) from memory into result.
1424	/// `mem_addr` does not need to be aligned on any particular boundary.
1425	///
1426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_pd)
1427	#[inline]
1428	#[target_feature(enable = "avx")]
1429	#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
1430	#[stable(feature = "simd_x86", since = "1.27.0")]
1431	pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
1432	let mut dst: __m256d = _mm256_undefined_pd();
1433	ptr::copy_nonoverlapping(
1434	src:mem_addr as *const u8,
1435	dst:ptr::addr_of_mut!(dst) as *mut u8,
1436	count:mem::size_of::<__m256d>(),
1437	);
1438	dst
1439	}
1440
1441	/// Stores 256-bits (composed of 4 packed double-precision (64-bit)
1442	/// floating-point elements) from `a` into memory.
1443	/// `mem_addr` does not need to be aligned on any particular boundary.
1444	///
1445	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_pd)
1446	#[inline]
1447	#[target_feature(enable = "avx")]
1448	#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
1449	#[stable(feature = "simd_x86", since = "1.27.0")]
1450	pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
1451	mem_addr.cast::<__m256d>().write_unaligned(val:a);
1452	}
1453
1454	/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
1455	/// floating-point elements) from memory into result.
1456	/// `mem_addr` does not need to be aligned on any particular boundary.
1457	///
1458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_ps)
1459	#[inline]
1460	#[target_feature(enable = "avx")]
1461	#[cfg_attr(test, assert_instr(vmovups))]
1462	#[stable(feature = "simd_x86", since = "1.27.0")]
1463	pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
1464	let mut dst: __m256 = _mm256_undefined_ps();
1465	ptr::copy_nonoverlapping(
1466	src:mem_addr as *const u8,
1467	dst:ptr::addr_of_mut!(dst) as *mut u8,
1468	count:mem::size_of::<__m256>(),
1469	);
1470	dst
1471	}
1472
1473	/// Stores 256-bits (composed of 8 packed single-precision (32-bit)
1474	/// floating-point elements) from `a` into memory.
1475	/// `mem_addr` does not need to be aligned on any particular boundary.
1476	///
1477	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_ps)
1478	#[inline]
1479	#[target_feature(enable = "avx")]
1480	#[cfg_attr(test, assert_instr(vmovups))]
1481	#[stable(feature = "simd_x86", since = "1.27.0")]
1482	pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
1483	mem_addr.cast::<__m256>().write_unaligned(val:a);
1484	}
1485
1486	/// Loads 256-bits of integer data from memory into result.
1487	/// `mem_addr` must be aligned on a 32-byte boundary or a
1488	/// general-protection exception may be generated.
1489	///
1490	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_si256)
1491	#[inline]
1492	#[target_feature(enable = "avx")]
1493	#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
1494	#[stable(feature = "simd_x86", since = "1.27.0")]
1495	pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
1496	*mem_addr
1497	}
1498
1499	/// Stores 256-bits of integer data from `a` into memory.
1500	/// `mem_addr` must be aligned on a 32-byte boundary or a
1501	/// general-protection exception may be generated.
1502	///
1503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_si256)
1504	#[inline]
1505	#[target_feature(enable = "avx")]
1506	#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
1507	#[stable(feature = "simd_x86", since = "1.27.0")]
1508	pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
1509	*mem_addr = a;
1510	}
1511
1512	/// Loads 256-bits of integer data from memory into result.
1513	/// `mem_addr` does not need to be aligned on any particular boundary.
1514	///
1515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_si256)
1516	#[inline]
1517	#[target_feature(enable = "avx")]
1518	#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
1519	#[stable(feature = "simd_x86", since = "1.27.0")]
1520	pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
1521	let mut dst: __m256i = _mm256_undefined_si256();
1522	ptr::copy_nonoverlapping(
1523	src:mem_addr as *const u8,
1524	dst:ptr::addr_of_mut!(dst) as *mut u8,
1525	count:mem::size_of::<__m256i>(),
1526	);
1527	dst
1528	}
1529
1530	/// Stores 256-bits of integer data from `a` into memory.
1531	/// `mem_addr` does not need to be aligned on any particular boundary.
1532	///
1533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_si256)
1534	#[inline]
1535	#[target_feature(enable = "avx")]
1536	#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
1537	#[stable(feature = "simd_x86", since = "1.27.0")]
1538	pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
1539	mem_addr.write_unaligned(val:a);
1540	}
1541
1542	/// Loads packed double-precision (64-bit) floating-point elements from memory
1543	/// into result using `mask` (elements are zeroed out when the high bit of the
1544	/// corresponding element is not set).
1545	///
1546	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_pd)
1547	#[inline]
1548	#[target_feature(enable = "avx")]
1549	#[cfg_attr(test, assert_instr(vmaskmovpd))]
1550	#[stable(feature = "simd_x86", since = "1.27.0")]
1551	pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
1552	maskloadpd256(mem_addr as *const i8, mask:mask.as_i64x4())
1553	}
1554
1555	/// Stores packed double-precision (64-bit) floating-point elements from `a`
1556	/// into memory using `mask`.
1557	///
1558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_pd)
1559	#[inline]
1560	#[target_feature(enable = "avx")]
1561	#[cfg_attr(test, assert_instr(vmaskmovpd))]
1562	#[stable(feature = "simd_x86", since = "1.27.0")]
1563	pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
1564	maskstorepd256(mem_addr as *mut i8, mask:mask.as_i64x4(), a);
1565	}
1566
1567	/// Loads packed double-precision (64-bit) floating-point elements from memory
1568	/// into result using `mask` (elements are zeroed out when the high bit of the
1569	/// corresponding element is not set).
1570	///
1571	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_pd)
1572	#[inline]
1573	#[target_feature(enable = "avx")]
1574	#[cfg_attr(test, assert_instr(vmaskmovpd))]
1575	#[stable(feature = "simd_x86", since = "1.27.0")]
1576	pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
1577	maskloadpd(mem_addr as *const i8, mask:mask.as_i64x2())
1578	}
1579
1580	/// Stores packed double-precision (64-bit) floating-point elements from `a`
1581	/// into memory using `mask`.
1582	///
1583	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_pd)
1584	#[inline]
1585	#[target_feature(enable = "avx")]
1586	#[cfg_attr(test, assert_instr(vmaskmovpd))]
1587	#[stable(feature = "simd_x86", since = "1.27.0")]
1588	pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
1589	maskstorepd(mem_addr as *mut i8, mask:mask.as_i64x2(), a);
1590	}
1591
1592	/// Loads packed single-precision (32-bit) floating-point elements from memory
1593	/// into result using `mask` (elements are zeroed out when the high bit of the
1594	/// corresponding element is not set).
1595	///
1596	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_ps)
1597	#[inline]
1598	#[target_feature(enable = "avx")]
1599	#[cfg_attr(test, assert_instr(vmaskmovps))]
1600	#[stable(feature = "simd_x86", since = "1.27.0")]
1601	pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
1602	maskloadps256(mem_addr as *const i8, mask:mask.as_i32x8())
1603	}
1604
1605	/// Stores packed single-precision (32-bit) floating-point elements from `a`
1606	/// into memory using `mask`.
1607	///
1608	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_ps)
1609	#[inline]
1610	#[target_feature(enable = "avx")]
1611	#[cfg_attr(test, assert_instr(vmaskmovps))]
1612	#[stable(feature = "simd_x86", since = "1.27.0")]
1613	pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
1614	maskstoreps256(mem_addr as *mut i8, mask:mask.as_i32x8(), a);
1615	}
1616
1617	/// Loads packed single-precision (32-bit) floating-point elements from memory
1618	/// into result using `mask` (elements are zeroed out when the high bit of the
1619	/// corresponding element is not set).
1620	///
1621	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_ps)
1622	#[inline]
1623	#[target_feature(enable = "avx")]
1624	#[cfg_attr(test, assert_instr(vmaskmovps))]
1625	#[stable(feature = "simd_x86", since = "1.27.0")]
1626	pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
1627	maskloadps(mem_addr as *const i8, mask:mask.as_i32x4())
1628	}
1629
1630	/// Stores packed single-precision (32-bit) floating-point elements from `a`
1631	/// into memory using `mask`.
1632	///
1633	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_ps)
1634	#[inline]
1635	#[target_feature(enable = "avx")]
1636	#[cfg_attr(test, assert_instr(vmaskmovps))]
1637	#[stable(feature = "simd_x86", since = "1.27.0")]
1638	pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
1639	maskstoreps(mem_addr as *mut i8, mask:mask.as_i32x4(), a);
1640	}
1641
1642	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
1643	/// from `a`, and returns the results.
1644	///
1645	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movehdup_ps)
1646	#[inline]
1647	#[target_feature(enable = "avx")]
1648	#[cfg_attr(test, assert_instr(vmovshdup))]
1649	#[stable(feature = "simd_x86", since = "1.27.0")]
1650	pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
1651	simd_shuffle!(a, a, [`1`, `1`, `3`, `3`, `5`, `5`, `7`, `7`])
1652	}
1653
1654	/// Duplicate even-indexed single-precision (32-bit) floating-point elements
1655	/// from `a`, and returns the results.
1656	///
1657	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_moveldup_ps)
1658	#[inline]
1659	#[target_feature(enable = "avx")]
1660	#[cfg_attr(test, assert_instr(vmovsldup))]
1661	#[stable(feature = "simd_x86", since = "1.27.0")]
1662	pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
1663	simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`])
1664	}
1665
1666	/// Duplicate even-indexed double-precision (64-bit) floating-point elements
1667	/// from `a`, and returns the results.
1668	///
1669	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movedup_pd)
1670	#[inline]
1671	#[target_feature(enable = "avx")]
1672	#[cfg_attr(test, assert_instr(vmovddup))]
1673	#[stable(feature = "simd_x86", since = "1.27.0")]
1674	pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d {
1675	simd_shuffle!(a, a, [`0`, `0`, `2`, `2`])
1676	}
1677
1678	/// Loads 256-bits of integer data from unaligned memory into result.
1679	/// This intrinsic may perform better than `_mm256_loadu_si256` when the
1680	/// data crosses a cache line boundary.
1681	///
1682	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lddqu_si256)
1683	#[inline]
1684	#[target_feature(enable = "avx")]
1685	#[cfg_attr(test, assert_instr(vlddqu))]
1686	#[stable(feature = "simd_x86", since = "1.27.0")]
1687	pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
1688	transmute(src:vlddqu(mem_addr as *const i8))
1689	}
1690
1691	/// Moves integer data from a 256-bit integer vector to a 32-byte
1692	/// aligned memory location. To minimize caching, the data is flagged as
1693	/// non-temporal (unlikely to be used again soon)
1694	///
1695	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_si256)
1696	///
1697	/// # Safety of non-temporal stores
1698	///
1699	/// After using this intrinsic, but before any other access to the memory that this intrinsic
1700	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1701	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1702	/// return.
1703	///
1704	/// See [`_mm_sfence`] for details.
1705	#[inline]
1706	#[target_feature(enable = "avx")]
1707	#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
1708	#[stable(feature = "simd_x86", since = "1.27.0")]
1709	pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
1710	intrinsics::nontemporal_store(ptr:mem_addr, val:a);
1711	}
1712
1713	/// Moves double-precision values from a 256-bit vector of `[4 x double]`
1714	/// to a 32-byte aligned memory location. To minimize caching, the data is
1715	/// flagged as non-temporal (unlikely to be used again soon).
1716	///
1717	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_pd)
1718	///
1719	/// # Safety of non-temporal stores
1720	///
1721	/// After using this intrinsic, but before any other access to the memory that this intrinsic
1722	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1723	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1724	/// return.
1725	///
1726	/// See [`_mm_sfence`] for details.
1727	#[inline]
1728	#[target_feature(enable = "avx")]
1729	#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
1730	#[stable(feature = "simd_x86", since = "1.27.0")]
1731	#[allow(clippy::cast_ptr_alignment)]
1732	pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
1733	intrinsics::nontemporal_store(ptr:mem_addr as *mut __m256d, val:a);
1734	}
1735
1736	/// Moves single-precision floating point values from a 256-bit vector
1737	/// of `[8 x float]` to a 32-byte aligned memory location. To minimize
1738	/// caching, the data is flagged as non-temporal (unlikely to be used again
1739	/// soon).
1740	///
1741	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_ps)
1742	///
1743	/// # Safety of non-temporal stores
1744	///
1745	/// After using this intrinsic, but before any other access to the memory that this intrinsic
1746	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1747	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1748	/// return.
1749	///
1750	/// See [`_mm_sfence`] for details.
1751	#[inline]
1752	#[target_feature(enable = "avx")]
1753	#[cfg_attr(test, assert_instr(vmovntps))]
1754	#[stable(feature = "simd_x86", since = "1.27.0")]
1755	#[allow(clippy::cast_ptr_alignment)]
1756	pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) {
1757	intrinsics::nontemporal_store(ptr:mem_addr as *mut __m256, val:a);
1758	}
1759
1760	/// Computes the approximate reciprocal of packed single-precision (32-bit)
1761	/// floating-point elements in `a`, and returns the results. The maximum
1762	/// relative error for this approximation is less than 1.52^-12.*
1763	///
1764	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp_ps)
1765	#[inline]
1766	#[target_feature(enable = "avx")]
1767	#[cfg_attr(test, assert_instr(vrcpps))]
1768	#[stable(feature = "simd_x86", since = "1.27.0")]
1769	pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 {
1770	vrcpps(a)
1771	}
1772
1773	/// Computes the approximate reciprocal square root of packed single-precision
1774	/// (32-bit) floating-point elements in `a`, and returns the results.
1775	/// The maximum relative error for this approximation is less than 1.52^-12.*
1776	///
1777	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt_ps)
1778	#[inline]
1779	#[target_feature(enable = "avx")]
1780	#[cfg_attr(test, assert_instr(vrsqrtps))]
1781	#[stable(feature = "simd_x86", since = "1.27.0")]
1782	pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
1783	vrsqrtps(a)
1784	}
1785
1786	/// Unpacks and interleave double-precision (64-bit) floating-point elements
1787	/// from the high half of each 128-bit lane in `a` and `b`.
1788	///
1789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_pd)
1790	#[inline]
1791	#[target_feature(enable = "avx")]
1792	#[cfg_attr(test, assert_instr(vunpckhpd))]
1793	#[stable(feature = "simd_x86", since = "1.27.0")]
1794	pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
1795	simd_shuffle!(a, b, [`1`, `5`, `3`, `7`])
1796	}
1797
1798	/// Unpacks and interleave single-precision (32-bit) floating-point elements
1799	/// from the high half of each 128-bit lane in `a` and `b`.
1800	///
1801	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_ps)
1802	#[inline]
1803	#[target_feature(enable = "avx")]
1804	#[cfg_attr(test, assert_instr(vunpckhps))]
1805	#[stable(feature = "simd_x86", since = "1.27.0")]
1806	pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
1807	simd_shuffle!(a, b, [`2`, `10`, `3`, `11`, `6`, `14`, `7`, `15`])
1808	}
1809
1810	/// Unpacks and interleave double-precision (64-bit) floating-point elements
1811	/// from the low half of each 128-bit lane in `a` and `b`.
1812	///
1813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_pd)
1814	#[inline]
1815	#[target_feature(enable = "avx")]
1816	#[cfg_attr(test, assert_instr(vunpcklpd))]
1817	#[stable(feature = "simd_x86", since = "1.27.0")]
1818	pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
1819	simd_shuffle!(a, b, [`0`, `4`, `2`, `6`])
1820	}
1821
1822	/// Unpacks and interleave single-precision (32-bit) floating-point elements
1823	/// from the low half of each 128-bit lane in `a` and `b`.
1824	///
1825	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_ps)
1826	#[inline]
1827	#[target_feature(enable = "avx")]
1828	#[cfg_attr(test, assert_instr(vunpcklps))]
1829	#[stable(feature = "simd_x86", since = "1.27.0")]
1830	pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
1831	simd_shuffle!(a, b, [`0`, `8`, `1`, `9`, `4`, `12`, `5`, `13`])
1832	}
1833
1834	/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
1835	/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
1836	/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
1837	/// the result is zero, otherwise set `CF` to 0. Return the `ZF` value.
1838	///
1839	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_si256)
1840	#[inline]
1841	#[target_feature(enable = "avx")]
1842	#[cfg_attr(test, assert_instr(vptest))]
1843	#[stable(feature = "simd_x86", since = "1.27.0")]
1844	pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
1845	ptestz256(a:a.as_i64x4(), b:b.as_i64x4())
1846	}
1847
1848	/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
1849	/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
1850	/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
1851	/// the result is zero, otherwise set `CF` to 0. Return the `CF` value.
1852	///
1853	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_si256)
1854	#[inline]
1855	#[target_feature(enable = "avx")]
1856	#[cfg_attr(test, assert_instr(vptest))]
1857	#[stable(feature = "simd_x86", since = "1.27.0")]
1858	pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
1859	ptestc256(a:a.as_i64x4(), b:b.as_i64x4())
1860	}
1861
1862	/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
1863	/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
1864	/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
1865	/// the result is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and
1866	/// `CF` values are zero, otherwise return 0.
1867	///
1868	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_si256)
1869	#[inline]
1870	#[target_feature(enable = "avx")]
1871	#[cfg_attr(test, assert_instr(vptest))]
1872	#[stable(feature = "simd_x86", since = "1.27.0")]
1873	pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 {
1874	ptestnzc256(a:a.as_i64x4(), b:b.as_i64x4())
1875	}
1876
1877	/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
1878	/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1879	/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1880	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1881	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1882	/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1883	/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
1884	///
1885	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_pd)
1886	#[inline]
1887	#[target_feature(enable = "avx")]
1888	#[cfg_attr(test, assert_instr(vtestpd))]
1889	#[stable(feature = "simd_x86", since = "1.27.0")]
1890	pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 {
1891	vtestzpd256(a, b)
1892	}
1893
1894	/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
1895	/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1896	/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1897	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1898	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1899	/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1900	/// is zero, otherwise set `CF` to 0. Return the `CF` value.
1901	///
1902	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_pd)
1903	#[inline]
1904	#[target_feature(enable = "avx")]
1905	#[cfg_attr(test, assert_instr(vtestpd))]
1906	#[stable(feature = "simd_x86", since = "1.27.0")]
1907	pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 {
1908	vtestcpd256(a, b)
1909	}
1910
1911	/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
1912	/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1913	/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1914	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1915	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1916	/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1917	/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
1918	/// are zero, otherwise return 0.
1919	///
1920	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_pd)
1921	#[inline]
1922	#[target_feature(enable = "avx")]
1923	#[cfg_attr(test, assert_instr(vtestpd))]
1924	#[stable(feature = "simd_x86", since = "1.27.0")]
1925	pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
1926	vtestnzcpd256(a, b)
1927	}
1928
1929	/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
1930	/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
1931	/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1932	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1933	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1934	/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1935	/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
1936	///
1937	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_pd)
1938	#[inline]
1939	#[target_feature(enable = "avx")]
1940	#[cfg_attr(test, assert_instr(vtestpd))]
1941	#[stable(feature = "simd_x86", since = "1.27.0")]
1942	pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
1943	vtestzpd(a, b)
1944	}
1945
1946	/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
1947	/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
1948	/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1949	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1950	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1951	/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1952	/// is zero, otherwise set `CF` to 0. Return the `CF` value.
1953	///
1954	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_pd)
1955	#[inline]
1956	#[target_feature(enable = "avx")]
1957	#[cfg_attr(test, assert_instr(vtestpd))]
1958	#[stable(feature = "simd_x86", since = "1.27.0")]
1959	pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
1960	vtestcpd(a, b)
1961	}
1962
1963	/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
1964	/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
1965	/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1966	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1967	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1968	/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1969	/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
1970	/// are zero, otherwise return 0.
1971	///
1972	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_pd)
1973	#[inline]
1974	#[target_feature(enable = "avx")]
1975	#[cfg_attr(test, assert_instr(vtestpd))]
1976	#[stable(feature = "simd_x86", since = "1.27.0")]
1977	pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 {
1978	vtestnzcpd(a, b)
1979	}
1980
1981	/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
1982	/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1983	/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
1984	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1985	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1986	/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
1987	/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
1988	///
1989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_ps)
1990	#[inline]
1991	#[target_feature(enable = "avx")]
1992	#[cfg_attr(test, assert_instr(vtestps))]
1993	#[stable(feature = "simd_x86", since = "1.27.0")]
1994	pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 {
1995	vtestzps256(a, b)
1996	}
1997
1998	/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
1999	/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
2000	/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2001	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2002	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2003	/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2004	/// is zero, otherwise set `CF` to 0. Return the `CF` value.
2005	///
2006	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_ps)
2007	#[inline]
2008	#[target_feature(enable = "avx")]
2009	#[cfg_attr(test, assert_instr(vtestps))]
2010	#[stable(feature = "simd_x86", since = "1.27.0")]
2011	pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 {
2012	vtestcps256(a, b)
2013	}
2014
2015	/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
2016	/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
2017	/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2018	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2019	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2020	/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2021	/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
2022	/// are zero, otherwise return 0.
2023	///
2024	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_ps)
2025	#[inline]
2026	#[target_feature(enable = "avx")]
2027	#[cfg_attr(test, assert_instr(vtestps))]
2028	#[stable(feature = "simd_x86", since = "1.27.0")]
2029	pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
2030	vtestnzcps256(a, b)
2031	}
2032
2033	/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2034	/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2035	/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2036	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2037	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2038	/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2039	/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
2040	///
2041	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_ps)
2042	#[inline]
2043	#[target_feature(enable = "avx")]
2044	#[cfg_attr(test, assert_instr(vtestps))]
2045	#[stable(feature = "simd_x86", since = "1.27.0")]
2046	pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
2047	vtestzps(a, b)
2048	}
2049
2050	/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2051	/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2052	/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2053	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2054	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2055	/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2056	/// is zero, otherwise set `CF` to 0. Return the `CF` value.
2057	///
2058	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_ps)
2059	#[inline]
2060	#[target_feature(enable = "avx")]
2061	#[cfg_attr(test, assert_instr(vtestps))]
2062	#[stable(feature = "simd_x86", since = "1.27.0")]
2063	pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
2064	vtestcps(a, b)
2065	}
2066
2067	/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2068	/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2069	/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2070	/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2071	/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2072	/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2073	/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
2074	/// are zero, otherwise return 0.
2075	///
2076	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_ps)
2077	#[inline]
2078	#[target_feature(enable = "avx")]
2079	#[cfg_attr(test, assert_instr(vtestps))]
2080	#[stable(feature = "simd_x86", since = "1.27.0")]
2081	pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
2082	vtestnzcps(a, b)
2083	}
2084
2085	/// Sets each bit of the returned mask based on the most significant bit of the
2086	/// corresponding packed double-precision (64-bit) floating-point element in
2087	/// `a`.
2088	///
2089	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_pd)
2090	#[inline]
2091	#[target_feature(enable = "avx")]
2092	#[cfg_attr(test, assert_instr(vmovmskpd))]
2093	#[stable(feature = "simd_x86", since = "1.27.0")]
2094	pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
2095	// Propagate the highest bit to the rest, because simd_bitmask
2096	// requires all-1 or all-0.
2097	let mask: i64x4 = simd_lt(x:transmute(a), y:i64x4::splat(`0`));
2098	simd_bitmask::<i64x4, u8>(mask).into()
2099	}
2100
2101	/// Sets each bit of the returned mask based on the most significant bit of the
2102	/// corresponding packed single-precision (32-bit) floating-point element in
2103	/// `a`.
2104	///
2105	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_ps)
2106	#[inline]
2107	#[target_feature(enable = "avx")]
2108	#[cfg_attr(test, assert_instr(vmovmskps))]
2109	#[stable(feature = "simd_x86", since = "1.27.0")]
2110	pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
2111	// Propagate the highest bit to the rest, because simd_bitmask
2112	// requires all-1 or all-0.
2113	let mask: i32x8 = simd_lt(x:transmute(a), y:i32x8::splat(`0`));
2114	simd_bitmask::<i32x8, u8>(mask).into()
2115	}
2116
2117	/// Returns vector of type __m256d with all elements set to zero.
2118	///
2119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_pd)
2120	#[inline]
2121	#[target_feature(enable = "avx")]
2122	#[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected
2123	#[stable(feature = "simd_x86", since = "1.27.0")]
2124	pub unsafe fn _mm256_setzero_pd() -> __m256d {
2125	_mm256_set1_pd(`0.0`)
2126	}
2127
2128	/// Returns vector of type __m256 with all elements set to zero.
2129	///
2130	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_ps)
2131	#[inline]
2132	#[target_feature(enable = "avx")]
2133	#[cfg_attr(test, assert_instr(vxorps))]
2134	#[stable(feature = "simd_x86", since = "1.27.0")]
2135	pub unsafe fn _mm256_setzero_ps() -> __m256 {
2136	_mm256_set1_ps(`0.0`)
2137	}
2138
2139	/// Returns vector of type __m256i with all elements set to zero.
2140	///
2141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_si256)
2142	#[inline]
2143	#[target_feature(enable = "avx")]
2144	#[cfg_attr(test, assert_instr(vxor))]
2145	#[stable(feature = "simd_x86", since = "1.27.0")]
2146	pub unsafe fn _mm256_setzero_si256() -> __m256i {
2147	_mm256_set1_epi8(`0`)
2148	}
2149
2150	/// Sets packed double-precision (64-bit) floating-point elements in returned
2151	/// vector with the supplied values.
2152	///
2153	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_pd)
2154	#[inline]
2155	#[target_feature(enable = "avx")]
2156	// This intrinsic has no corresponding instruction.
2157	#[cfg_attr(test, assert_instr(vinsertf128))]
2158	#[stable(feature = "simd_x86", since = "1.27.0")]
2159	pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2160	_mm256_setr_pd(a:d, b:c, c:b, d:a)
2161	}
2162
2163	/// Sets packed single-precision (32-bit) floating-point elements in returned
2164	/// vector with the supplied values.
2165	///
2166	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_ps)
2167	#[inline]
2168	#[target_feature(enable = "avx")]
2169	// This intrinsic has no corresponding instruction.
2170	#[stable(feature = "simd_x86", since = "1.27.0")]
2171	pub unsafe fn _mm256_set_ps(
2172	a: f32,
2173	b: f32,
2174	c: f32,
2175	d: f32,
2176	e: f32,
2177	f: f32,
2178	g: f32,
2179	h: f32,
2180	) -> __m256 {
2181	_mm256_setr_ps(a:h, b:g, c:f, d:e, e:d, f:c, g:b, h:a)
2182	}
2183
2184	/// Sets packed 8-bit integers in returned vector with the supplied values.
2185	///
2186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi8)
2187	#[inline]
2188	#[target_feature(enable = "avx")]
2189	// This intrinsic has no corresponding instruction.
2190	#[stable(feature = "simd_x86", since = "1.27.0")]
2191	pub unsafe fn _mm256_set_epi8(
2192	e00: i8,
2193	e01: i8,
2194	e02: i8,
2195	e03: i8,
2196	e04: i8,
2197	e05: i8,
2198	e06: i8,
2199	e07: i8,
2200	e08: i8,
2201	e09: i8,
2202	e10: i8,
2203	e11: i8,
2204	e12: i8,
2205	e13: i8,
2206	e14: i8,
2207	e15: i8,
2208	e16: i8,
2209	e17: i8,
2210	e18: i8,
2211	e19: i8,
2212	e20: i8,
2213	e21: i8,
2214	e22: i8,
2215	e23: i8,
2216	e24: i8,
2217	e25: i8,
2218	e26: i8,
2219	e27: i8,
2220	e28: i8,
2221	e29: i8,
2222	e30: i8,
2223	e31: i8,
2224	) -> __m256i {
2225	#[rustfmt::skip]
2226	_mm256_setr_epi8(
2227	e00:e31, e01:e30, e02:e29, e03:e28, e04:e27, e05:e26, e06:e25, e07:e24,
2228	e08:e23, e09:e22, e10:e21, e11:e20, e12:e19, e13:e18, e14:e17, e15:e16,
2229	e16:e15, e17:e14, e18:e13, e19:e12, e20:e11, e21:e10, e22:e09, e23:e08,
2230	e24:e07, e25:e06, e26:e05, e27:e04, e28:e03, e29:e02, e30:e01, e31:e00,
2231	)
2232	}
2233
2234	/// Sets packed 16-bit integers in returned vector with the supplied values.
2235	///
2236	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi16)
2237	#[inline]
2238	#[target_feature(enable = "avx")]
2239	// This intrinsic has no corresponding instruction.
2240	#[stable(feature = "simd_x86", since = "1.27.0")]
2241	pub unsafe fn _mm256_set_epi16(
2242	e00: i16,
2243	e01: i16,
2244	e02: i16,
2245	e03: i16,
2246	e04: i16,
2247	e05: i16,
2248	e06: i16,
2249	e07: i16,
2250	e08: i16,
2251	e09: i16,
2252	e10: i16,
2253	e11: i16,
2254	e12: i16,
2255	e13: i16,
2256	e14: i16,
2257	e15: i16,
2258	) -> __m256i {
2259	#[rustfmt::skip]
2260	_mm256_setr_epi16(
2261	e00:e15, e01:e14, e02:e13, e03:e12,
2262	e04:e11, e05:e10, e06:e09, e07:e08,
2263	e08:e07, e09:e06, e10:e05, e11:e04,
2264	e12:e03, e13:e02, e14:e01, e15:e00,
2265	)
2266	}
2267
2268	/// Sets packed 32-bit integers in returned vector with the supplied values.
2269	///
2270	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi32)
2271	#[inline]
2272	#[target_feature(enable = "avx")]
2273	// This intrinsic has no corresponding instruction.
2274	#[stable(feature = "simd_x86", since = "1.27.0")]
2275	pub unsafe fn _mm256_set_epi32(
2276	e0: i32,
2277	e1: i32,
2278	e2: i32,
2279	e3: i32,
2280	e4: i32,
2281	e5: i32,
2282	e6: i32,
2283	e7: i32,
2284	) -> __m256i {
2285	_mm256_setr_epi32(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
2286	}
2287
2288	/// Sets packed 64-bit integers in returned vector with the supplied values.
2289	///
2290	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi64x)
2291	#[inline]
2292	#[target_feature(enable = "avx")]
2293	// This intrinsic has no corresponding instruction.
2294	#[stable(feature = "simd_x86", since = "1.27.0")]
2295	pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2296	_mm256_setr_epi64x(a:d, b:c, c:b, d:a)
2297	}
2298
2299	/// Sets packed double-precision (64-bit) floating-point elements in returned
2300	/// vector with the supplied values in reverse order.
2301	///
2302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_pd)
2303	#[inline]
2304	#[target_feature(enable = "avx")]
2305	// This intrinsic has no corresponding instruction.
2306	#[stable(feature = "simd_x86", since = "1.27.0")]
2307	pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2308	__m256d(a, b, c, d)
2309	}
2310
2311	/// Sets packed single-precision (32-bit) floating-point elements in returned
2312	/// vector with the supplied values in reverse order.
2313	///
2314	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_ps)
2315	#[inline]
2316	#[target_feature(enable = "avx")]
2317	// This intrinsic has no corresponding instruction.
2318	#[stable(feature = "simd_x86", since = "1.27.0")]
2319	pub unsafe fn _mm256_setr_ps(
2320	a: f32,
2321	b: f32,
2322	c: f32,
2323	d: f32,
2324	e: f32,
2325	f: f32,
2326	g: f32,
2327	h: f32,
2328	) -> __m256 {
2329	__m256(a, b, c, d, e, f, g, h)
2330	}
2331
2332	/// Sets packed 8-bit integers in returned vector with the supplied values in
2333	/// reverse order.
2334	///
2335	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi8)
2336	#[inline]
2337	#[target_feature(enable = "avx")]
2338	// This intrinsic has no corresponding instruction.
2339	#[stable(feature = "simd_x86", since = "1.27.0")]
2340	pub unsafe fn _mm256_setr_epi8(
2341	e00: i8,
2342	e01: i8,
2343	e02: i8,
2344	e03: i8,
2345	e04: i8,
2346	e05: i8,
2347	e06: i8,
2348	e07: i8,
2349	e08: i8,
2350	e09: i8,
2351	e10: i8,
2352	e11: i8,
2353	e12: i8,
2354	e13: i8,
2355	e14: i8,
2356	e15: i8,
2357	e16: i8,
2358	e17: i8,
2359	e18: i8,
2360	e19: i8,
2361	e20: i8,
2362	e21: i8,
2363	e22: i8,
2364	e23: i8,
2365	e24: i8,
2366	e25: i8,
2367	e26: i8,
2368	e27: i8,
2369	e28: i8,
2370	e29: i8,
2371	e30: i8,
2372	e31: i8,
2373	) -> __m256i {
2374	#[rustfmt::skip]
2375	transmute(src:i8x32::new(
2376	x0:e00, x1:e01, x2:e02, x3:e03, x4:e04, x5:e05, x6:e06, x7:e07,
2377	x8:e08, x9:e09, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
2378	x16:e16, x17:e17, x18:e18, x19:e19, x20:e20, x21:e21, x22:e22, x23:e23,
2379	x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31,
2380	))
2381	}
2382
2383	/// Sets packed 16-bit integers in returned vector with the supplied values in
2384	/// reverse order.
2385	///
2386	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi16)
2387	#[inline]
2388	#[target_feature(enable = "avx")]
2389	// This intrinsic has no corresponding instruction.
2390	#[stable(feature = "simd_x86", since = "1.27.0")]
2391	pub unsafe fn _mm256_setr_epi16(
2392	e00: i16,
2393	e01: i16,
2394	e02: i16,
2395	e03: i16,
2396	e04: i16,
2397	e05: i16,
2398	e06: i16,
2399	e07: i16,
2400	e08: i16,
2401	e09: i16,
2402	e10: i16,
2403	e11: i16,
2404	e12: i16,
2405	e13: i16,
2406	e14: i16,
2407	e15: i16,
2408	) -> __m256i {
2409	#[rustfmt::skip]
2410	transmute(src:i16x16::new(
2411	x0:e00, x1:e01, x2:e02, x3:e03,
2412	x4:e04, x5:e05, x6:e06, x7:e07,
2413	x8:e08, x9:e09, x10:e10, x11:e11,
2414	x12:e12, x13:e13, x14:e14, x15:e15,
2415	))
2416	}
2417
2418	/// Sets packed 32-bit integers in returned vector with the supplied values in
2419	/// reverse order.
2420	///
2421	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi32)
2422	#[inline]
2423	#[target_feature(enable = "avx")]
2424	// This intrinsic has no corresponding instruction.
2425	#[stable(feature = "simd_x86", since = "1.27.0")]
2426	pub unsafe fn _mm256_setr_epi32(
2427	e0: i32,
2428	e1: i32,
2429	e2: i32,
2430	e3: i32,
2431	e4: i32,
2432	e5: i32,
2433	e6: i32,
2434	e7: i32,
2435	) -> __m256i {
2436	transmute(src:i32x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7))
2437	}
2438
2439	/// Sets packed 64-bit integers in returned vector with the supplied values in
2440	/// reverse order.
2441	///
2442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi64x)
2443	#[inline]
2444	#[target_feature(enable = "avx")]
2445	// This intrinsic has no corresponding instruction.
2446	#[stable(feature = "simd_x86", since = "1.27.0")]
2447	pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2448	transmute(src:i64x4::new(x0:a, x1:b, x2:c, x3:d))
2449	}
2450
2451	/// Broadcasts double-precision (64-bit) floating-point value `a` to all
2452	/// elements of returned vector.
2453	///
2454	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_pd)
2455	#[inline]
2456	#[target_feature(enable = "avx")]
2457	// This intrinsic has no corresponding instruction.
2458	#[stable(feature = "simd_x86", since = "1.27.0")]
2459	pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d {
2460	_mm256_setr_pd(a, b:a, c:a, d:a)
2461	}
2462
2463	/// Broadcasts single-precision (32-bit) floating-point value `a` to all
2464	/// elements of returned vector.
2465	///
2466	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_ps)
2467	#[inline]
2468	#[target_feature(enable = "avx")]
2469	// This intrinsic has no corresponding instruction.
2470	#[stable(feature = "simd_x86", since = "1.27.0")]
2471	pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 {
2472	_mm256_setr_ps(a, b:a, c:a, d:a, e:a, f:a, g:a, h:a)
2473	}
2474
2475	/// Broadcasts 8-bit integer `a` to all elements of returned vector.
2476	/// This intrinsic may generate the `vpbroadcastb`.
2477	///
2478	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi8)
2479	#[inline]
2480	#[target_feature(enable = "avx")]
2481	// This intrinsic has no corresponding instruction.
2482	#[stable(feature = "simd_x86", since = "1.27.0")]
2483	pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i {
2484	#[rustfmt::skip]
2485	_mm256_setr_epi8(
2486	e00:a, e01:a, e02:a, e03:a, e04:a, e05:a, e06:a, e07:a,
2487	e08:a, e09:a, e10:a, e11:a, e12:a, e13:a, e14:a, e15:a,
2488	e16:a, e17:a, e18:a, e19:a, e20:a, e21:a, e22:a, e23:a,
2489	e24:a, e25:a, e26:a, e27:a, e28:a, e29:a, e30:a, e31:a,
2490	)
2491	}
2492
2493	/// Broadcasts 16-bit integer `a` to all elements of returned vector.
2494	/// This intrinsic may generate the `vpbroadcastw`.
2495	///
2496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi16)
2497	#[inline]
2498	#[target_feature(enable = "avx")]
2499	//#[cfg_attr(test, assert_instr(vpshufb))]
2500	#[cfg_attr(test, assert_instr(vinsertf128))]
2501	// This intrinsic has no corresponding instruction.
2502	#[stable(feature = "simd_x86", since = "1.27.0")]
2503	pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i {
2504	_mm256_setr_epi16(e00:a, e01:a, e02:a, e03:a, e04:a, e05:a, e06:a, e07:a, e08:a, e09:a, e10:a, e11:a, e12:a, e13:a, e14:a, e15:a)
2505	}
2506
2507	/// Broadcasts 32-bit integer `a` to all elements of returned vector.
2508	/// This intrinsic may generate the `vpbroadcastd`.
2509	///
2510	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi32)
2511	#[inline]
2512	#[target_feature(enable = "avx")]
2513	// This intrinsic has no corresponding instruction.
2514	#[stable(feature = "simd_x86", since = "1.27.0")]
2515	pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i {
2516	_mm256_setr_epi32(e0:a, e1:a, e2:a, e3:a, e4:a, e5:a, e6:a, e7:a)
2517	}
2518
2519	/// Broadcasts 64-bit integer `a` to all elements of returned vector.
2520	/// This intrinsic may generate the `vpbroadcastq`.
2521	///
2522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi64x)
2523	#[inline]
2524	#[target_feature(enable = "avx")]
2525	#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(vinsertf128))]
2526	#[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))]
2527	// This intrinsic has no corresponding instruction.
2528	#[stable(feature = "simd_x86", since = "1.27.0")]
2529	pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i {
2530	_mm256_setr_epi64x(a, b:a, c:a, d:a)
2531	}
2532
2533	/// Cast vector of type __m256d to type __m256.
2534	///
2535	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd_ps)
2536	#[inline]
2537	#[target_feature(enable = "avx")]
2538	// This intrinsic is only used for compilation and does not generate any
2539	// instructions, thus it has zero latency.
2540	#[stable(feature = "simd_x86", since = "1.27.0")]
2541	pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 {
2542	transmute(src:a)
2543	}
2544
2545	/// Cast vector of type __m256 to type __m256d.
2546	///
2547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_pd)
2548	#[inline]
2549	#[target_feature(enable = "avx")]
2550	// This intrinsic is only used for compilation and does not generate any
2551	// instructions, thus it has zero latency.
2552	#[stable(feature = "simd_x86", since = "1.27.0")]
2553	pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d {
2554	transmute(src:a)
2555	}
2556
2557	/// Casts vector of type __m256 to type __m256i.
2558	///
2559	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_si256)
2560	#[inline]
2561	#[target_feature(enable = "avx")]
2562	// This intrinsic is only used for compilation and does not generate any
2563	// instructions, thus it has zero latency.
2564	#[stable(feature = "simd_x86", since = "1.27.0")]
2565	pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i {
2566	transmute(src:a)
2567	}
2568
2569	/// Casts vector of type __m256i to type __m256.
2570	///
2571	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_ps)
2572	#[inline]
2573	#[target_feature(enable = "avx")]
2574	// This intrinsic is only used for compilation and does not generate any
2575	// instructions, thus it has zero latency.
2576	#[stable(feature = "simd_x86", since = "1.27.0")]
2577	pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
2578	transmute(src:a)
2579	}
2580
2581	/// Casts vector of type __m256d to type __m256i.
2582	///
2583	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd_si256)
2584	#[inline]
2585	#[target_feature(enable = "avx")]
2586	// This intrinsic is only used for compilation and does not generate any
2587	// instructions, thus it has zero latency.
2588	#[stable(feature = "simd_x86", since = "1.27.0")]
2589	pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i {
2590	transmute(src:a)
2591	}
2592
2593	/// Casts vector of type __m256i to type __m256d.
2594	///
2595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_pd)
2596	#[inline]
2597	#[target_feature(enable = "avx")]
2598	// This intrinsic is only used for compilation and does not generate any
2599	// instructions, thus it has zero latency.
2600	#[stable(feature = "simd_x86", since = "1.27.0")]
2601	pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
2602	transmute(src:a)
2603	}
2604
2605	/// Casts vector of type __m256 to type __m128.
2606	///
2607	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps256_ps128)
2608	#[inline]
2609	#[target_feature(enable = "avx")]
2610	// This intrinsic is only used for compilation and does not generate any
2611	// instructions, thus it has zero latency.
2612	#[stable(feature = "simd_x86", since = "1.27.0")]
2613	pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 {
2614	simd_shuffle!(a, a, [`0`, `1`, `2`, `3`])
2615	}
2616
2617	/// Casts vector of type __m256d to type __m128d.
2618	///
2619	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd256_pd128)
2620	#[inline]
2621	#[target_feature(enable = "avx")]
2622	// This intrinsic is only used for compilation and does not generate any
2623	// instructions, thus it has zero latency.
2624	#[stable(feature = "simd_x86", since = "1.27.0")]
2625	pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
2626	simd_shuffle!(a, a, [`0`, `1`])
2627	}
2628
2629	/// Casts vector of type __m256i to type __m128i.
2630	///
2631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_si128)
2632	#[inline]
2633	#[target_feature(enable = "avx")]
2634	// This intrinsic is only used for compilation and does not generate any
2635	// instructions, thus it has zero latency.
2636	#[stable(feature = "simd_x86", since = "1.27.0")]
2637	pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
2638	let a: i64x4 = a.as_i64x4();
2639	let dst: i64x2 = simd_shuffle!(a, a, [`0`, `1`]);
2640	transmute(src:dst)
2641	}
2642
2643	/// Casts vector of type __m128 to type __m256;
2644	/// the upper 128 bits of the result are undefined.
2645	///
2646	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps128_ps256)
2647	#[inline]
2648	#[target_feature(enable = "avx")]
2649	// This intrinsic is only used for compilation and does not generate any
2650	// instructions, thus it has zero latency.
2651	#[stable(feature = "simd_x86", since = "1.27.0")]
2652	pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2653	// FIXME simd_shuffle!(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
2654	simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `0`, `0`, `0`])
2655	}
2656
2657	/// Casts vector of type __m128d to type __m256d;
2658	/// the upper 128 bits of the result are undefined.
2659	///
2660	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd128_pd256)
2661	#[inline]
2662	#[target_feature(enable = "avx")]
2663	// This intrinsic is only used for compilation and does not generate any
2664	// instructions, thus it has zero latency.
2665	#[stable(feature = "simd_x86", since = "1.27.0")]
2666	pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2667	// FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2668	simd_shuffle!(a, a, [`0`, `1`, `0`, `0`])
2669	}
2670
2671	/// Casts vector of type __m128i to type __m256i;
2672	/// the upper 128 bits of the result are undefined.
2673	///
2674	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi128_si256)
2675	#[inline]
2676	#[target_feature(enable = "avx")]
2677	// This intrinsic is only used for compilation and does not generate any
2678	// instructions, thus it has zero latency.
2679	#[stable(feature = "simd_x86", since = "1.27.0")]
2680	pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
2681	let a: i64x2 = a.as_i64x2();
2682	// FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2683	let dst: i64x4 = simd_shuffle!(a, a, [`0`, `1`, `0`, `0`]);
2684	transmute(src:dst)
2685	}
2686
2687	/// Constructs a 256-bit floating-point vector of `[8 x float]` from a
2688	/// 128-bit floating-point vector of `[4 x float]`. The lower 128 bits contain
2689	/// the value of the source vector. The upper 128 bits are set to zero.
2690	///
2691	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextps128_ps256)
2692	#[inline]
2693	#[target_feature(enable = "avx,sse")]
2694	// This intrinsic is only used for compilation and does not generate any
2695	// instructions, thus it has zero latency.
2696	#[stable(feature = "simd_x86", since = "1.27.0")]
2697	pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
2698	simd_shuffle!(a, _mm_setzero_ps(), [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`])
2699	}
2700
2701	/// Constructs a 256-bit integer vector from a 128-bit integer vector.
2702	/// The lower 128 bits contain the value of the source vector. The upper
2703	/// 128 bits are set to zero.
2704	///
2705	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextsi128_si256)
2706	#[inline]
2707	#[target_feature(enable = "avx,sse2")]
2708	// This intrinsic is only used for compilation and does not generate any
2709	// instructions, thus it has zero latency.
2710	#[stable(feature = "simd_x86", since = "1.27.0")]
2711	pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2712	let b: i64x2 = _mm_setzero_si128().as_i64x2();
2713	let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [`0`, `1`, `2`, `3`]);
2714	transmute(src:dst)
2715	}
2716
2717	/// Constructs a 256-bit floating-point vector of `[4 x double]` from a
2718	/// 128-bit floating-point vector of `[2 x double]`. The lower 128 bits
2719	/// contain the value of the source vector. The upper 128 bits are set
2720	/// to zero.
2721	///
2722	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextpd128_pd256)
2723	#[inline]
2724	#[target_feature(enable = "avx,sse2")]
2725	// This intrinsic is only used for compilation and does not generate any
2726	// instructions, thus it has zero latency.
2727	#[stable(feature = "simd_x86", since = "1.27.0")]
2728	pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
2729	simd_shuffle!(a, _mm_setzero_pd(), [`0`, `1`, `2`, `3`])
2730	}
2731
2732	/// Returns vector of type `__m256` with indeterminate elements.
2733	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2734	/// In practice, this is equivalent to [`mem::zeroed`].
2735	///
2736	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_ps)
2737	#[inline]
2738	#[target_feature(enable = "avx")]
2739	// This intrinsic has no corresponding instruction.
2740	#[stable(feature = "simd_x86", since = "1.27.0")]
2741	pub unsafe fn _mm256_undefined_ps() -> __m256 {
2742	_mm256_set1_ps(`0.0`)
2743	}
2744
2745	/// Returns vector of type `__m256d` with indeterminate elements.
2746	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2747	/// In practice, this is equivalent to [`mem::zeroed`].
2748	///
2749	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_pd)
2750	#[inline]
2751	#[target_feature(enable = "avx")]
2752	// This intrinsic has no corresponding instruction.
2753	#[stable(feature = "simd_x86", since = "1.27.0")]
2754	pub unsafe fn _mm256_undefined_pd() -> __m256d {
2755	_mm256_set1_pd(`0.0`)
2756	}
2757
2758	/// Returns vector of type __m256i with with indeterminate elements.
2759	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2760	/// In practice, this is equivalent to [`mem::zeroed`].
2761	///
2762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_si256)
2763	#[inline]
2764	#[target_feature(enable = "avx")]
2765	// This intrinsic has no corresponding instruction.
2766	#[stable(feature = "simd_x86", since = "1.27.0")]
2767	pub unsafe fn _mm256_undefined_si256() -> __m256i {
2768	__m256i(`0`, `0`, `0`, `0`)
2769	}
2770
2771	/// Sets packed __m256 returned vector with the supplied values.
2772	///
2773	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128)
2774	#[inline]
2775	#[target_feature(enable = "avx")]
2776	#[cfg_attr(test, assert_instr(vinsertf128))]
2777	#[stable(feature = "simd_x86", since = "1.27.0")]
2778	pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
2779	simd_shuffle!(lo, hi, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`])
2780	}
2781
2782	/// Sets packed __m256d returned vector with the supplied values.
2783	///
2784	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128d)
2785	#[inline]
2786	#[target_feature(enable = "avx")]
2787	#[cfg_attr(test, assert_instr(vinsertf128))]
2788	#[stable(feature = "simd_x86", since = "1.27.0")]
2789	pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
2790	let hi: __m128 = transmute(src:hi);
2791	let lo: __m128 = transmute(src:lo);
2792	transmute(src:_mm256_set_m128(hi, lo))
2793	}
2794
2795	/// Sets packed __m256i returned vector with the supplied values.
2796	///
2797	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128i)
2798	#[inline]
2799	#[target_feature(enable = "avx")]
2800	#[cfg_attr(test, assert_instr(vinsertf128))]
2801	#[stable(feature = "simd_x86", since = "1.27.0")]
2802	pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
2803	let hi: __m128 = transmute(src:hi);
2804	let lo: __m128 = transmute(src:lo);
2805	transmute(src:_mm256_set_m128(hi, lo))
2806	}
2807
2808	/// Sets packed __m256 returned vector with the supplied values.
2809	///
2810	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128)
2811	#[inline]
2812	#[target_feature(enable = "avx")]
2813	#[cfg_attr(test, assert_instr(vinsertf128))]
2814	#[stable(feature = "simd_x86", since = "1.27.0")]
2815	pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
2816	_mm256_set_m128(hi, lo)
2817	}
2818
2819	/// Sets packed __m256d returned vector with the supplied values.
2820	///
2821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128d)
2822	#[inline]
2823	#[target_feature(enable = "avx")]
2824	#[cfg_attr(test, assert_instr(vinsertf128))]
2825	#[stable(feature = "simd_x86", since = "1.27.0")]
2826	pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
2827	_mm256_set_m128d(hi, lo)
2828	}
2829
2830	/// Sets packed __m256i returned vector with the supplied values.
2831	///
2832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128i)
2833	#[inline]
2834	#[target_feature(enable = "avx")]
2835	#[cfg_attr(test, assert_instr(vinsertf128))]
2836	#[stable(feature = "simd_x86", since = "1.27.0")]
2837	pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
2838	_mm256_set_m128i(hi, lo)
2839	}
2840
2841	/// Loads two 128-bit values (composed of 4 packed single-precision (32-bit)
2842	/// floating-point elements) from memory, and combine them into a 256-bit
2843	/// value.
2844	/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2845	///
2846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128)
2847	#[inline]
2848	#[target_feature(enable = "avx,sse")]
2849	// This intrinsic has no corresponding instruction.
2850	#[stable(feature = "simd_x86", since = "1.27.0")]
2851	pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
2852	let a: __m256 = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
2853	_mm256_insertf128_ps::<`1`>(a, b:_mm_loadu_ps(hiaddr))
2854	}
2855
2856	/// Loads two 128-bit values (composed of 2 packed double-precision (64-bit)
2857	/// floating-point elements) from memory, and combine them into a 256-bit
2858	/// value.
2859	/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2860	///
2861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128d)
2862	#[inline]
2863	#[target_feature(enable = "avx,sse2")]
2864	// This intrinsic has no corresponding instruction.
2865	#[stable(feature = "simd_x86", since = "1.27.0")]
2866	pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
2867	let a: __m256d = _mm256_castpd128_pd256(_mm_loadu_pd(mem_addr:loaddr));
2868	_mm256_insertf128_pd::<`1`>(a, b:_mm_loadu_pd(mem_addr:hiaddr))
2869	}
2870
2871	/// Loads two 128-bit values (composed of integer data) from memory, and combine
2872	/// them into a 256-bit value.
2873	/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2874	///
2875	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128i)
2876	#[inline]
2877	#[target_feature(enable = "avx,sse2")]
2878	// This intrinsic has no corresponding instruction.
2879	#[stable(feature = "simd_x86", since = "1.27.0")]
2880	pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
2881	let a: __m256i = _mm256_castsi128_si256(_mm_loadu_si128(mem_addr:loaddr));
2882	_mm256_insertf128_si256::<`1`>(a, b:_mm_loadu_si128(mem_addr:hiaddr))
2883	}
2884
2885	/// Stores the high and low 128-bit halves (each composed of 4 packed
2886	/// single-precision (32-bit) floating-point elements) from `a` into memory two
2887	/// different 128-bit locations.
2888	/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2889	///
2890	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128)
2891	#[inline]
2892	#[target_feature(enable = "avx,sse")]
2893	// This intrinsic has no corresponding instruction.
2894	#[stable(feature = "simd_x86", since = "1.27.0")]
2895	pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
2896	let lo: __m128 = _mm256_castps256_ps128(a);
2897	_mm_storeu_ps(p:loaddr, a:lo);
2898	let hi: __m128 = _mm256_extractf128_ps::<`1`>(a);
2899	_mm_storeu_ps(p:hiaddr, a:hi);
2900	}
2901
2902	/// Stores the high and low 128-bit halves (each composed of 2 packed
2903	/// double-precision (64-bit) floating-point elements) from `a` into memory two
2904	/// different 128-bit locations.
2905	/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2906	///
2907	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128d)
2908	#[inline]
2909	#[target_feature(enable = "avx,sse2")]
2910	// This intrinsic has no corresponding instruction.
2911	#[stable(feature = "simd_x86", since = "1.27.0")]
2912	pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
2913	let lo: __m128d = _mm256_castpd256_pd128(a);
2914	_mm_storeu_pd(mem_addr:loaddr, a:lo);
2915	let hi: __m128d = _mm256_extractf128_pd::<`1`>(a);
2916	_mm_storeu_pd(mem_addr:hiaddr, a:hi);
2917	}
2918
2919	/// Stores the high and low 128-bit halves (each composed of integer data) from
2920	/// `a` into memory two different 128-bit locations.
2921	/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2922	///
2923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128i)
2924	#[inline]
2925	#[target_feature(enable = "avx,sse2")]
2926	// This intrinsic has no corresponding instruction.
2927	#[stable(feature = "simd_x86", since = "1.27.0")]
2928	pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
2929	let lo: __m128i = _mm256_castsi256_si128(a);
2930	_mm_storeu_si128(mem_addr:loaddr, a:lo);
2931	let hi: __m128i = _mm256_extractf128_si256::<`1`>(a);
2932	_mm_storeu_si128(mem_addr:hiaddr, a:hi);
2933	}
2934
2935	/// Returns the first element of the input vector of `[8 x float]`.
2936	///
2937	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtss_f32)
2938	#[inline]
2939	#[target_feature(enable = "avx")]
2940	//#[cfg_attr(test, assert_instr(movss))] FIXME
2941	#[stable(feature = "simd_x86", since = "1.27.0")]
2942	pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 {
2943	simd_extract!(a, `0`)
2944	}
2945
2946	// LLVM intrinsics used in the above functions
2947	#[allow(improper_ctypes)]
2948	extern "C" {
2949	#[link_name = "llvm.x86.avx.round.pd.256"]
2950	fn roundpd256(a: __m256d, b: i32) -> __m256d;
2951	#[link_name = "llvm.x86.avx.round.ps.256"]
2952	fn roundps256(a: __m256, b: i32) -> __m256;
2953	#[link_name = "llvm.x86.avx.sqrt.ps.256"]
2954	fn sqrtps256(a: __m256) -> __m256;
2955	#[link_name = "llvm.x86.avx.dp.ps.256"]
2956	fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256;
2957	#[link_name = "llvm.x86.avx.hadd.pd.256"]
2958	fn vhaddpd(a: __m256d, b: __m256d) -> __m256d;
2959	#[link_name = "llvm.x86.avx.hadd.ps.256"]
2960	fn vhaddps(a: __m256, b: __m256) -> __m256;
2961	#[link_name = "llvm.x86.avx.hsub.pd.256"]
2962	fn vhsubpd(a: __m256d, b: __m256d) -> __m256d;
2963	#[link_name = "llvm.x86.avx.hsub.ps.256"]
2964	fn vhsubps(a: __m256, b: __m256) -> __m256;
2965	#[link_name = "llvm.x86.sse2.cmp.pd"]
2966	fn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2967	#[link_name = "llvm.x86.avx.cmp.pd.256"]
2968	fn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d;
2969	#[link_name = "llvm.x86.sse.cmp.ps"]
2970	fn vcmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
2971	#[link_name = "llvm.x86.avx.cmp.ps.256"]
2972	fn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256;
2973	#[link_name = "llvm.x86.sse2.cmp.sd"]
2974	fn vcmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2975	#[link_name = "llvm.x86.sse.cmp.ss"]
2976	fn vcmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
2977	#[link_name = "llvm.x86.avx.cvt.ps2dq.256"]
2978	fn vcvtps2dq(a: __m256) -> i32x8;
2979	#[link_name = "llvm.x86.avx.cvtt.pd2dq.256"]
2980	fn vcvttpd2dq(a: __m256d) -> i32x4;
2981	#[link_name = "llvm.x86.avx.cvt.pd2dq.256"]
2982	fn vcvtpd2dq(a: __m256d) -> i32x4;
2983	#[link_name = "llvm.x86.avx.cvtt.ps2dq.256"]
2984	fn vcvttps2dq(a: __m256) -> i32x8;
2985	#[link_name = "llvm.x86.avx.vzeroall"]
2986	fn vzeroall();
2987	#[link_name = "llvm.x86.avx.vzeroupper"]
2988	fn vzeroupper();
2989	#[link_name = "llvm.x86.avx.vpermilvar.ps.256"]
2990	fn vpermilps256(a: __m256, b: i32x8) -> __m256;
2991	#[link_name = "llvm.x86.avx.vpermilvar.ps"]
2992	fn vpermilps(a: __m128, b: i32x4) -> __m128;
2993	#[link_name = "llvm.x86.avx.vpermilvar.pd.256"]
2994	fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
2995	#[link_name = "llvm.x86.avx.vpermilvar.pd"]
2996	fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
2997	#[link_name = "llvm.x86.avx.vperm2f128.ps.256"]
2998	fn vperm2f128ps256(a: __m256, b: __m256, imm8: i8) -> __m256;
2999	#[link_name = "llvm.x86.avx.vperm2f128.pd.256"]
3000	fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d;
3001	#[link_name = "llvm.x86.avx.vperm2f128.si.256"]
3002	fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8;
3003	#[link_name = "llvm.x86.avx.maskload.pd.256"]
3004	fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
3005	#[link_name = "llvm.x86.avx.maskstore.pd.256"]
3006	fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
3007	#[link_name = "llvm.x86.avx.maskload.pd"]
3008	fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
3009	#[link_name = "llvm.x86.avx.maskstore.pd"]
3010	fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
3011	#[link_name = "llvm.x86.avx.maskload.ps.256"]
3012	fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
3013	#[link_name = "llvm.x86.avx.maskstore.ps.256"]
3014	fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
3015	#[link_name = "llvm.x86.avx.maskload.ps"]
3016	fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
3017	#[link_name = "llvm.x86.avx.maskstore.ps"]
3018	fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
3019	#[link_name = "llvm.x86.avx.ldu.dq.256"]
3020	fn vlddqu(mem_addr: *const i8) -> i8x32;
3021	#[link_name = "llvm.x86.avx.rcp.ps.256"]
3022	fn vrcpps(a: __m256) -> __m256;
3023	#[link_name = "llvm.x86.avx.rsqrt.ps.256"]
3024	fn vrsqrtps(a: __m256) -> __m256;
3025	#[link_name = "llvm.x86.avx.ptestz.256"]
3026	fn ptestz256(a: i64x4, b: i64x4) -> i32;
3027	#[link_name = "llvm.x86.avx.ptestc.256"]
3028	fn ptestc256(a: i64x4, b: i64x4) -> i32;
3029	#[link_name = "llvm.x86.avx.ptestnzc.256"]
3030	fn ptestnzc256(a: i64x4, b: i64x4) -> i32;
3031	#[link_name = "llvm.x86.avx.vtestz.pd.256"]
3032	fn vtestzpd256(a: __m256d, b: __m256d) -> i32;
3033	#[link_name = "llvm.x86.avx.vtestc.pd.256"]
3034	fn vtestcpd256(a: __m256d, b: __m256d) -> i32;
3035	#[link_name = "llvm.x86.avx.vtestnzc.pd.256"]
3036	fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32;
3037	#[link_name = "llvm.x86.avx.vtestz.pd"]
3038	fn vtestzpd(a: __m128d, b: __m128d) -> i32;
3039	#[link_name = "llvm.x86.avx.vtestc.pd"]
3040	fn vtestcpd(a: __m128d, b: __m128d) -> i32;
3041	#[link_name = "llvm.x86.avx.vtestnzc.pd"]
3042	fn vtestnzcpd(a: __m128d, b: __m128d) -> i32;
3043	#[link_name = "llvm.x86.avx.vtestz.ps.256"]
3044	fn vtestzps256(a: __m256, b: __m256) -> i32;
3045	#[link_name = "llvm.x86.avx.vtestc.ps.256"]
3046	fn vtestcps256(a: __m256, b: __m256) -> i32;
3047	#[link_name = "llvm.x86.avx.vtestnzc.ps.256"]
3048	fn vtestnzcps256(a: __m256, b: __m256) -> i32;
3049	#[link_name = "llvm.x86.avx.vtestz.ps"]
3050	fn vtestzps(a: __m128, b: __m128) -> i32;
3051	#[link_name = "llvm.x86.avx.vtestc.ps"]
3052	fn vtestcps(a: __m128, b: __m128) -> i32;
3053	#[link_name = "llvm.x86.avx.vtestnzc.ps"]
3054	fn vtestnzcps(a: __m128, b: __m128) -> i32;
3055	#[link_name = "llvm.x86.avx.min.ps.256"]
3056	fn vminps(a: __m256, b: __m256) -> __m256;
3057	#[link_name = "llvm.x86.avx.max.ps.256"]
3058	fn vmaxps(a: __m256, b: __m256) -> __m256;
3059	#[link_name = "llvm.x86.avx.min.pd.256"]
3060	fn vminpd(a: __m256d, b: __m256d) -> __m256d;
3061	#[link_name = "llvm.x86.avx.max.pd.256"]
3062	fn vmaxpd(a: __m256d, b: __m256d) -> __m256d;
3063	}
3064
3065	#[cfg(test)]
3066	mod tests {
3067	use crate::hint::black_box;
3068	use crate::ptr;
3069	use stdarch_test::simd_test;
3070
3071	use crate::core_arch::x86::*;
3072
3073	#[simd_test(enable = "avx")]
3074	unsafe fn test_mm256_add_pd() {
3075	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3076	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3077	let r = _mm256_add_pd(a, b);
3078	let e = _mm256_setr_pd(`6.`, `8.`, `10.`, `12.`);
3079	assert_eq_m256d(r, e);
3080	}
3081
3082	#[simd_test(enable = "avx")]
3083	unsafe fn test_mm256_add_ps() {
3084	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
3085	let b = _mm256_setr_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
3086	let r = _mm256_add_ps(a, b);
3087	let e = _mm256_setr_ps(`10.`, `12.`, `14.`, `16.`, `18.`, `20.`, `22.`, `24.`);
3088	assert_eq_m256(r, e);
3089	}
3090
3091	#[simd_test(enable = "avx")]
3092	unsafe fn test_mm256_and_pd() {
3093	let a = _mm256_set1_pd(`1.`);
3094	let b = _mm256_set1_pd(`0.6`);
3095	let r = _mm256_and_pd(a, b);
3096	let e = _mm256_set1_pd(`0.5`);
3097	assert_eq_m256d(r, e);
3098	}
3099
3100	#[simd_test(enable = "avx")]
3101	unsafe fn test_mm256_and_ps() {
3102	let a = _mm256_set1_ps(`1.`);
3103	let b = _mm256_set1_ps(`0.6`);
3104	let r = _mm256_and_ps(a, b);
3105	let e = _mm256_set1_ps(`0.5`);
3106	assert_eq_m256(r, e);
3107	}
3108
3109	#[simd_test(enable = "avx")]
3110	unsafe fn test_mm256_or_pd() {
3111	let a = _mm256_set1_pd(`1.`);
3112	let b = _mm256_set1_pd(`0.6`);
3113	let r = _mm256_or_pd(a, b);
3114	let e = _mm256_set1_pd(`1.2`);
3115	assert_eq_m256d(r, e);
3116	}
3117
3118	#[simd_test(enable = "avx")]
3119	unsafe fn test_mm256_or_ps() {
3120	let a = _mm256_set1_ps(`1.`);
3121	let b = _mm256_set1_ps(`0.6`);
3122	let r = _mm256_or_ps(a, b);
3123	let e = _mm256_set1_ps(`1.2`);
3124	assert_eq_m256(r, e);
3125	}
3126
3127	#[simd_test(enable = "avx")]
3128	unsafe fn test_mm256_shuffle_pd() {
3129	let a = _mm256_setr_pd(`1.`, `4.`, `5.`, `8.`);
3130	let b = _mm256_setr_pd(`2.`, `3.`, `6.`, `7.`);
3131	let r = _mm256_shuffle_pd::<`0b11_11_11_11`>(a, b);
3132	let e = _mm256_setr_pd(`4.`, `3.`, `8.`, `7.`);
3133	assert_eq_m256d(r, e);
3134	}
3135
3136	#[simd_test(enable = "avx")]
3137	unsafe fn test_mm256_shuffle_ps() {
3138	let a = _mm256_setr_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
3139	let b = _mm256_setr_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
3140	let r = _mm256_shuffle_ps::<`0b00_00_11_11`>(a, b);
3141	let e = _mm256_setr_ps(`8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`);
3142	assert_eq_m256(r, e);
3143	}
3144
3145	#[simd_test(enable = "avx")]
3146	unsafe fn test_mm256_andnot_pd() {
3147	let a = _mm256_set1_pd(`0.`);
3148	let b = _mm256_set1_pd(`0.6`);
3149	let r = _mm256_andnot_pd(a, b);
3150	assert_eq_m256d(r, b);
3151	}
3152
3153	#[simd_test(enable = "avx")]
3154	unsafe fn test_mm256_andnot_ps() {
3155	let a = _mm256_set1_ps(`0.`);
3156	let b = _mm256_set1_ps(`0.6`);
3157	let r = _mm256_andnot_ps(a, b);
3158	assert_eq_m256(r, b);
3159	}
3160
3161	#[simd_test(enable = "avx")]
3162	unsafe fn test_mm256_max_pd() {
3163	let a = _mm256_setr_pd(`1.`, `4.`, `5.`, `8.`);
3164	let b = _mm256_setr_pd(`2.`, `3.`, `6.`, `7.`);
3165	let r = _mm256_max_pd(a, b);
3166	let e = _mm256_setr_pd(`2.`, `4.`, `6.`, `8.`);
3167	assert_eq_m256d(r, e);
3168	// > If the values being compared are both 0.0s (of either sign), the
3169	// > value in the second operand (source operand) is returned.
3170	let w = _mm256_max_pd(_mm256_set1_pd(`0.0`), _mm256_set1_pd(`-0.0`));
3171	let x = _mm256_max_pd(_mm256_set1_pd(`-0.0`), _mm256_set1_pd(`0.0`));
3172	let wu: [u64; `4`] = transmute(w);
3173	let xu: [u64; `4`] = transmute(x);
3174	assert_eq!(wu, [`0x8000_0000_0000_0000u64`; `4`]);
3175	assert_eq!(xu, [`0u64`; `4`]);
3176	// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3177	// > second operand (source operand), either a NaN or a valid
3178	// > floating-point value, is written to the result.
3179	let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(`0.0`));
3180	let z = _mm256_max_pd(_mm256_set1_pd(`0.0`), _mm256_set1_pd(f64::NAN));
3181	let yf: [f64; `4`] = transmute(y);
3182	let zf: [f64; `4`] = transmute(z);
3183	assert_eq!(yf, [`0.0`; `4`]);
3184	assert!(zf.iter().all(\|f\| f.is_nan()), "{:?}", zf);
3185	}
3186
3187	#[simd_test(enable = "avx")]
3188	unsafe fn test_mm256_max_ps() {
3189	let a = _mm256_setr_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
3190	let b = _mm256_setr_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
3191	let r = _mm256_max_ps(a, b);
3192	let e = _mm256_setr_ps(`2.`, `4.`, `6.`, `8.`, `10.`, `12.`, `14.`, `16.`);
3193	assert_eq_m256(r, e);
3194	// > If the values being compared are both 0.0s (of either sign), the
3195	// > value in the second operand (source operand) is returned.
3196	let w = _mm256_max_ps(_mm256_set1_ps(`0.0`), _mm256_set1_ps(`-0.0`));
3197	let x = _mm256_max_ps(_mm256_set1_ps(`-0.0`), _mm256_set1_ps(`0.0`));
3198	let wu: [u32; `8`] = transmute(w);
3199	let xu: [u32; `8`] = transmute(x);
3200	assert_eq!(wu, [`0x8000_0000u32`; `8`]);
3201	assert_eq!(xu, [`0u32`; `8`]);
3202	// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3203	// > second operand (source operand), either a NaN or a valid
3204	// > floating-point value, is written to the result.
3205	let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(`0.0`));
3206	let z = _mm256_max_ps(_mm256_set1_ps(`0.0`), _mm256_set1_ps(f32::NAN));
3207	let yf: [f32; `8`] = transmute(y);
3208	let zf: [f32; `8`] = transmute(z);
3209	assert_eq!(yf, [`0.0`; `8`]);
3210	assert!(zf.iter().all(\|f\| f.is_nan()), "{:?}", zf);
3211	}
3212
3213	#[simd_test(enable = "avx")]
3214	unsafe fn test_mm256_min_pd() {
3215	let a = _mm256_setr_pd(`1.`, `4.`, `5.`, `8.`);
3216	let b = _mm256_setr_pd(`2.`, `3.`, `6.`, `7.`);
3217	let r = _mm256_min_pd(a, b);
3218	let e = _mm256_setr_pd(`1.`, `3.`, `5.`, `7.`);
3219	assert_eq_m256d(r, e);
3220	// > If the values being compared are both 0.0s (of either sign), the
3221	// > value in the second operand (source operand) is returned.
3222	let w = _mm256_min_pd(_mm256_set1_pd(`0.0`), _mm256_set1_pd(`-0.0`));
3223	let x = _mm256_min_pd(_mm256_set1_pd(`-0.0`), _mm256_set1_pd(`0.0`));
3224	let wu: [u64; `4`] = transmute(w);
3225	let xu: [u64; `4`] = transmute(x);
3226	assert_eq!(wu, [`0x8000_0000_0000_0000u64`; `4`]);
3227	assert_eq!(xu, [`0u64`; `4`]);
3228	// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3229	// > second operand (source operand), either a NaN or a valid
3230	// > floating-point value, is written to the result.
3231	let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(`0.0`));
3232	let z = _mm256_min_pd(_mm256_set1_pd(`0.0`), _mm256_set1_pd(f64::NAN));
3233	let yf: [f64; `4`] = transmute(y);
3234	let zf: [f64; `4`] = transmute(z);
3235	assert_eq!(yf, [`0.0`; `4`]);
3236	assert!(zf.iter().all(\|f\| f.is_nan()), "{:?}", zf);
3237	}
3238
3239	#[simd_test(enable = "avx")]
3240	unsafe fn test_mm256_min_ps() {
3241	let a = _mm256_setr_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
3242	let b = _mm256_setr_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
3243	let r = _mm256_min_ps(a, b);
3244	let e = _mm256_setr_ps(`1.`, `3.`, `5.`, `7.`, `9.`, `11.`, `13.`, `15.`);
3245	assert_eq_m256(r, e);
3246	// > If the values being compared are both 0.0s (of either sign), the
3247	// > value in the second operand (source operand) is returned.
3248	let w = _mm256_min_ps(_mm256_set1_ps(`0.0`), _mm256_set1_ps(`-0.0`));
3249	let x = _mm256_min_ps(_mm256_set1_ps(`-0.0`), _mm256_set1_ps(`0.0`));
3250	let wu: [u32; `8`] = transmute(w);
3251	let xu: [u32; `8`] = transmute(x);
3252	assert_eq!(wu, [`0x8000_0000u32`; `8`]);
3253	assert_eq!(xu, [`0u32`; `8`]);
3254	// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3255	// > second operand (source operand), either a NaN or a valid
3256	// > floating-point value, is written to the result.
3257	let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(`0.0`));
3258	let z = _mm256_min_ps(_mm256_set1_ps(`0.0`), _mm256_set1_ps(f32::NAN));
3259	let yf: [f32; `8`] = transmute(y);
3260	let zf: [f32; `8`] = transmute(z);
3261	assert_eq!(yf, [`0.0`; `8`]);
3262	assert!(zf.iter().all(\|f\| f.is_nan()), "{:?}", zf);
3263	}
3264
3265	#[simd_test(enable = "avx")]
3266	unsafe fn test_mm256_mul_pd() {
3267	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3268	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3269	let r = _mm256_mul_pd(a, b);
3270	let e = _mm256_setr_pd(`5.`, `12.`, `21.`, `32.`);
3271	assert_eq_m256d(r, e);
3272	}
3273
3274	#[simd_test(enable = "avx")]
3275	unsafe fn test_mm256_mul_ps() {
3276	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
3277	let b = _mm256_setr_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
3278	let r = _mm256_mul_ps(a, b);
3279	let e = _mm256_setr_ps(`9.`, `20.`, `33.`, `48.`, `65.`, `84.`, `105.`, `128.`);
3280	assert_eq_m256(r, e);
3281	}
3282
3283	#[simd_test(enable = "avx")]
3284	unsafe fn test_mm256_addsub_pd() {
3285	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3286	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3287	let r = _mm256_addsub_pd(a, b);
3288	let e = _mm256_setr_pd(`-4.`, `8.`, `-4.`, `12.`);
3289	assert_eq_m256d(r, e);
3290	}
3291
3292	#[simd_test(enable = "avx")]
3293	unsafe fn test_mm256_addsub_ps() {
3294	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `1.`, `2.`, `3.`, `4.`);
3295	let b = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `5.`, `6.`, `7.`, `8.`);
3296	let r = _mm256_addsub_ps(a, b);
3297	let e = _mm256_setr_ps(`-4.`, `8.`, `-4.`, `12.`, `-4.`, `8.`, `-4.`, `12.`);
3298	assert_eq_m256(r, e);
3299	}
3300
3301	#[simd_test(enable = "avx")]
3302	unsafe fn test_mm256_sub_pd() {
3303	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3304	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3305	let r = _mm256_sub_pd(a, b);
3306	let e = _mm256_setr_pd(`-4.`, `-4.`, `-4.`, `-4.`);
3307	assert_eq_m256d(r, e);
3308	}
3309
3310	#[simd_test(enable = "avx")]
3311	unsafe fn test_mm256_sub_ps() {
3312	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `-1.`, `-2.`, `-3.`, `-4.`);
3313	let b = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `3.`, `2.`, `1.`, `0.`);
3314	let r = _mm256_sub_ps(a, b);
3315	let e = _mm256_setr_ps(`-4.`, `-4.`, `-4.`, `-4.`, `-4.`, `-4.`, `-4.`, `-4.`);
3316	assert_eq_m256(r, e);
3317	}
3318
3319	#[simd_test(enable = "avx")]
3320	unsafe fn test_mm256_round_pd() {
3321	let a = _mm256_setr_pd(`1.55`, `2.2`, `3.99`, `-1.2`);
3322	let result_closest = _mm256_round_pd::<`0b0000`>(a);
3323	let result_down = _mm256_round_pd::<`0b0001`>(a);
3324	let result_up = _mm256_round_pd::<`0b0010`>(a);
3325	let expected_closest = _mm256_setr_pd(`2.`, `2.`, `4.`, `-1.`);
3326	let expected_down = _mm256_setr_pd(`1.`, `2.`, `3.`, `-2.`);
3327	let expected_up = _mm256_setr_pd(`2.`, `3.`, `4.`, `-1.`);
3328	assert_eq_m256d(result_closest, expected_closest);
3329	assert_eq_m256d(result_down, expected_down);
3330	assert_eq_m256d(result_up, expected_up);
3331	}
3332
3333	#[simd_test(enable = "avx")]
3334	unsafe fn test_mm256_floor_pd() {
3335	let a = _mm256_setr_pd(`1.55`, `2.2`, `3.99`, `-1.2`);
3336	let result_down = _mm256_floor_pd(a);
3337	let expected_down = _mm256_setr_pd(`1.`, `2.`, `3.`, `-2.`);
3338	assert_eq_m256d(result_down, expected_down);
3339	}
3340
3341	#[simd_test(enable = "avx")]
3342	unsafe fn test_mm256_ceil_pd() {
3343	let a = _mm256_setr_pd(`1.55`, `2.2`, `3.99`, `-1.2`);
3344	let result_up = _mm256_ceil_pd(a);
3345	let expected_up = _mm256_setr_pd(`2.`, `3.`, `4.`, `-1.`);
3346	assert_eq_m256d(result_up, expected_up);
3347	}
3348
3349	#[simd_test(enable = "avx")]
3350	unsafe fn test_mm256_round_ps() {
3351	let a = _mm256_setr_ps(`1.55`, `2.2`, `3.99`, `-1.2`, `1.55`, `2.2`, `3.99`, `-1.2`);
3352	let result_closest = _mm256_round_ps::<`0b0000`>(a);
3353	let result_down = _mm256_round_ps::<`0b0001`>(a);
3354	let result_up = _mm256_round_ps::<`0b0010`>(a);
3355	let expected_closest = _mm256_setr_ps(`2.`, `2.`, `4.`, `-1.`, `2.`, `2.`, `4.`, `-1.`);
3356	let expected_down = _mm256_setr_ps(`1.`, `2.`, `3.`, `-2.`, `1.`, `2.`, `3.`, `-2.`);
3357	let expected_up = _mm256_setr_ps(`2.`, `3.`, `4.`, `-1.`, `2.`, `3.`, `4.`, `-1.`);
3358	assert_eq_m256(result_closest, expected_closest);
3359	assert_eq_m256(result_down, expected_down);
3360	assert_eq_m256(result_up, expected_up);
3361	}
3362
3363	#[simd_test(enable = "avx")]
3364	unsafe fn test_mm256_floor_ps() {
3365	let a = _mm256_setr_ps(`1.55`, `2.2`, `3.99`, `-1.2`, `1.55`, `2.2`, `3.99`, `-1.2`);
3366	let result_down = _mm256_floor_ps(a);
3367	let expected_down = _mm256_setr_ps(`1.`, `2.`, `3.`, `-2.`, `1.`, `2.`, `3.`, `-2.`);
3368	assert_eq_m256(result_down, expected_down);
3369	}
3370
3371	#[simd_test(enable = "avx")]
3372	unsafe fn test_mm256_ceil_ps() {
3373	let a = _mm256_setr_ps(`1.55`, `2.2`, `3.99`, `-1.2`, `1.55`, `2.2`, `3.99`, `-1.2`);
3374	let result_up = _mm256_ceil_ps(a);
3375	let expected_up = _mm256_setr_ps(`2.`, `3.`, `4.`, `-1.`, `2.`, `3.`, `4.`, `-1.`);
3376	assert_eq_m256(result_up, expected_up);
3377	}
3378
3379	#[simd_test(enable = "avx")]
3380	unsafe fn test_mm256_sqrt_pd() {
3381	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3382	let r = _mm256_sqrt_pd(a);
3383	let e = _mm256_setr_pd(`2.`, `3.`, `4.`, `5.`);
3384	assert_eq_m256d(r, e);
3385	}
3386
3387	#[simd_test(enable = "avx")]
3388	unsafe fn test_mm256_sqrt_ps() {
3389	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3390	let r = _mm256_sqrt_ps(a);
3391	let e = _mm256_setr_ps(`2.`, `3.`, `4.`, `5.`, `2.`, `3.`, `4.`, `5.`);
3392	assert_eq_m256(r, e);
3393	}
3394
3395	#[simd_test(enable = "avx")]
3396	unsafe fn test_mm256_div_ps() {
3397	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3398	let b = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3399	let r = _mm256_div_ps(a, b);
3400	let e = _mm256_setr_ps(`1.`, `3.`, `8.`, `5.`, `0.5`, `1.`, `0.25`, `0.5`);
3401	assert_eq_m256(r, e);
3402	}
3403
3404	#[simd_test(enable = "avx")]
3405	unsafe fn test_mm256_div_pd() {
3406	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3407	let b = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3408	let r = _mm256_div_pd(a, b);
3409	let e = _mm256_setr_pd(`1.`, `3.`, `8.`, `5.`);
3410	assert_eq_m256d(r, e);
3411	}
3412
3413	#[simd_test(enable = "avx")]
3414	unsafe fn test_mm256_blend_pd() {
3415	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3416	let b = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3417	let r = _mm256_blend_pd::<`0x0`>(a, b);
3418	assert_eq_m256d(r, _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`));
3419	let r = _mm256_blend_pd::<`0x3`>(a, b);
3420	assert_eq_m256d(r, _mm256_setr_pd(`4.`, `3.`, `16.`, `25.`));
3421	let r = _mm256_blend_pd::<`0xF`>(a, b);
3422	assert_eq_m256d(r, _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`));
3423	}
3424
3425	#[simd_test(enable = "avx")]
3426	unsafe fn test_mm256_blend_ps() {
3427	let a = _mm256_setr_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
3428	let b = _mm256_setr_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
3429	let r = _mm256_blend_ps::<`0x0`>(a, b);
3430	assert_eq_m256(r, _mm256_setr_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`));
3431	let r = _mm256_blend_ps::<`0x3`>(a, b);
3432	assert_eq_m256(r, _mm256_setr_ps(`2.`, `3.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`));
3433	let r = _mm256_blend_ps::<`0xF`>(a, b);
3434	assert_eq_m256(r, _mm256_setr_ps(`2.`, `3.`, `6.`, `7.`, `9.`, `12.`, `13.`, `16.`));
3435	}
3436
3437	#[simd_test(enable = "avx")]
3438	unsafe fn test_mm256_blendv_pd() {
3439	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3440	let b = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3441	let c = _mm256_setr_pd(`0.`, `0.`, !`0` as f64, !`0` as f64);
3442	let r = _mm256_blendv_pd(a, b, c);
3443	let e = _mm256_setr_pd(`4.`, `9.`, `2.`, `5.`);
3444	assert_eq_m256d(r, e);
3445	}
3446
3447	#[simd_test(enable = "avx")]
3448	unsafe fn test_mm256_blendv_ps() {
3449	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3450	let b = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3451	#[rustfmt::skip]
3452	let c = _mm256_setr_ps(
3453	`0.`, `0.`, `0.`, `0.`, !`0` as f32, !`0` as f32, !`0` as f32, !`0` as f32,
3454	);
3455	let r = _mm256_blendv_ps(a, b, c);
3456	let e = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `8.`, `9.`, `64.`, `50.`);
3457	assert_eq_m256(r, e);
3458	}
3459
3460	#[simd_test(enable = "avx")]
3461	unsafe fn test_mm256_dp_ps() {
3462	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3463	let b = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3464	let r = _mm256_dp_ps::<`0xFF`>(a, b);
3465	let e = _mm256_setr_ps(`200.`, `200.`, `200.`, `200.`, `2387.`, `2387.`, `2387.`, `2387.`);
3466	assert_eq_m256(r, e);
3467	}
3468
3469	#[simd_test(enable = "avx")]
3470	unsafe fn test_mm256_hadd_pd() {
3471	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3472	let b = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3473	let r = _mm256_hadd_pd(a, b);
3474	let e = _mm256_setr_pd(`13.`, `7.`, `41.`, `7.`);
3475	assert_eq_m256d(r, e);
3476
3477	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3478	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3479	let r = _mm256_hadd_pd(a, b);
3480	let e = _mm256_setr_pd(`3.`, `11.`, `7.`, `15.`);
3481	assert_eq_m256d(r, e);
3482	}
3483
3484	#[simd_test(enable = "avx")]
3485	unsafe fn test_mm256_hadd_ps() {
3486	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3487	let b = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3488	let r = _mm256_hadd_ps(a, b);
3489	let e = _mm256_setr_ps(`13.`, `41.`, `7.`, `7.`, `13.`, `41.`, `17.`, `114.`);
3490	assert_eq_m256(r, e);
3491
3492	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `1.`, `2.`, `3.`, `4.`);
3493	let b = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `5.`, `6.`, `7.`, `8.`);
3494	let r = _mm256_hadd_ps(a, b);
3495	let e = _mm256_setr_ps(`3.`, `7.`, `11.`, `15.`, `3.`, `7.`, `11.`, `15.`);
3496	assert_eq_m256(r, e);
3497	}
3498
3499	#[simd_test(enable = "avx")]
3500	unsafe fn test_mm256_hsub_pd() {
3501	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3502	let b = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3503	let r = _mm256_hsub_pd(a, b);
3504	let e = _mm256_setr_pd(`-5.`, `1.`, `-9.`, `-3.`);
3505	assert_eq_m256d(r, e);
3506
3507	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3508	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3509	let r = _mm256_hsub_pd(a, b);
3510	let e = _mm256_setr_pd(`-1.`, `-1.`, `-1.`, `-1.`);
3511	assert_eq_m256d(r, e);
3512	}
3513
3514	#[simd_test(enable = "avx")]
3515	unsafe fn test_mm256_hsub_ps() {
3516	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3517	let b = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3518	let r = _mm256_hsub_ps(a, b);
3519	let e = _mm256_setr_ps(`-5.`, `-9.`, `1.`, `-3.`, `-5.`, `-9.`, `-1.`, `14.`);
3520	assert_eq_m256(r, e);
3521
3522	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `1.`, `2.`, `3.`, `4.`);
3523	let b = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `5.`, `6.`, `7.`, `8.`);
3524	let r = _mm256_hsub_ps(a, b);
3525	let e = _mm256_setr_ps(`-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`);
3526	assert_eq_m256(r, e);
3527	}
3528
3529	#[simd_test(enable = "avx")]
3530	unsafe fn test_mm256_xor_pd() {
3531	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3532	let b = _mm256_set1_pd(`0.`);
3533	let r = _mm256_xor_pd(a, b);
3534	assert_eq_m256d(r, a);
3535	}
3536
3537	#[simd_test(enable = "avx")]
3538	unsafe fn test_mm256_xor_ps() {
3539	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3540	let b = _mm256_set1_ps(`0.`);
3541	let r = _mm256_xor_ps(a, b);
3542	assert_eq_m256(r, a);
3543	}
3544
3545	#[simd_test(enable = "avx")]
3546	unsafe fn test_mm_cmp_pd() {
3547	let a = _mm_setr_pd(`4.`, `9.`);
3548	let b = _mm_setr_pd(`4.`, `3.`);
3549	let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b);
3550	assert!(get_m128d(r, `0`).is_nan());
3551	assert!(get_m128d(r, `1`).is_nan());
3552	}
3553
3554	#[simd_test(enable = "avx")]
3555	unsafe fn test_mm256_cmp_pd() {
3556	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3557	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3558	let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b);
3559	let e = _mm256_set1_pd(`0.`);
3560	assert_eq_m256d(r, e);
3561	}
3562
3563	#[simd_test(enable = "avx")]
3564	unsafe fn test_mm_cmp_ps() {
3565	let a = _mm_setr_ps(`4.`, `3.`, `2.`, `5.`);
3566	let b = _mm_setr_ps(`4.`, `9.`, `16.`, `25.`);
3567	let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b);
3568	assert!(get_m128(r, `0`).is_nan());
3569	assert_eq!(get_m128(r, `1`), `0.`);
3570	assert_eq!(get_m128(r, `2`), `0.`);
3571	assert_eq!(get_m128(r, `3`), `0.`);
3572	}
3573
3574	#[simd_test(enable = "avx")]
3575	unsafe fn test_mm256_cmp_ps() {
3576	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `1.`, `2.`, `3.`, `4.`);
3577	let b = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `5.`, `6.`, `7.`, `8.`);
3578	let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b);
3579	let e = _mm256_set1_ps(`0.`);
3580	assert_eq_m256(r, e);
3581	}
3582
3583	#[simd_test(enable = "avx")]
3584	unsafe fn test_mm_cmp_sd() {
3585	let a = _mm_setr_pd(`4.`, `9.`);
3586	let b = _mm_setr_pd(`4.`, `3.`);
3587	let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b);
3588	assert!(get_m128d(r, `0`).is_nan());
3589	assert_eq!(get_m128d(r, `1`), `9.`);
3590	}
3591
3592	#[simd_test(enable = "avx")]
3593	unsafe fn test_mm_cmp_ss() {
3594	let a = _mm_setr_ps(`4.`, `3.`, `2.`, `5.`);
3595	let b = _mm_setr_ps(`4.`, `9.`, `16.`, `25.`);
3596	let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b);
3597	assert!(get_m128(r, `0`).is_nan());
3598	assert_eq!(get_m128(r, `1`), `3.`);
3599	assert_eq!(get_m128(r, `2`), `2.`);
3600	assert_eq!(get_m128(r, `3`), `5.`);
3601	}
3602
3603	#[simd_test(enable = "avx")]
3604	unsafe fn test_mm256_cvtepi32_pd() {
3605	let a = _mm_setr_epi32(`4`, `9`, `16`, `25`);
3606	let r = _mm256_cvtepi32_pd(a);
3607	let e = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3608	assert_eq_m256d(r, e);
3609	}
3610
3611	#[simd_test(enable = "avx")]
3612	unsafe fn test_mm256_cvtepi32_ps() {
3613	let a = _mm256_setr_epi32(`4`, `9`, `16`, `25`, `4`, `9`, `16`, `25`);
3614	let r = _mm256_cvtepi32_ps(a);
3615	let e = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3616	assert_eq_m256(r, e);
3617	}
3618
3619	#[simd_test(enable = "avx")]
3620	unsafe fn test_mm256_cvtpd_ps() {
3621	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3622	let r = _mm256_cvtpd_ps(a);
3623	let e = _mm_setr_ps(`4.`, `9.`, `16.`, `25.`);
3624	assert_eq_m128(r, e);
3625	}
3626
3627	#[simd_test(enable = "avx")]
3628	unsafe fn test_mm256_cvtps_epi32() {
3629	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3630	let r = _mm256_cvtps_epi32(a);
3631	let e = _mm256_setr_epi32(`4`, `9`, `16`, `25`, `4`, `9`, `16`, `25`);
3632	assert_eq_m256i(r, e);
3633	}
3634
3635	#[simd_test(enable = "avx")]
3636	unsafe fn test_mm256_cvtps_pd() {
3637	let a = _mm_setr_ps(`4.`, `9.`, `16.`, `25.`);
3638	let r = _mm256_cvtps_pd(a);
3639	let e = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3640	assert_eq_m256d(r, e);
3641	}
3642
3643	#[simd_test(enable = "avx")]
3644	unsafe fn test_mm256_cvttpd_epi32() {
3645	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3646	let r = _mm256_cvttpd_epi32(a);
3647	let e = _mm_setr_epi32(`4`, `9`, `16`, `25`);
3648	assert_eq_m128i(r, e);
3649	}
3650
3651	#[simd_test(enable = "avx")]
3652	unsafe fn test_mm256_cvtpd_epi32() {
3653	let a = _mm256_setr_pd(`4.`, `9.`, `16.`, `25.`);
3654	let r = _mm256_cvtpd_epi32(a);
3655	let e = _mm_setr_epi32(`4`, `9`, `16`, `25`);
3656	assert_eq_m128i(r, e);
3657	}
3658
3659	#[simd_test(enable = "avx")]
3660	unsafe fn test_mm256_cvttps_epi32() {
3661	let a = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `4.`, `9.`, `16.`, `25.`);
3662	let r = _mm256_cvttps_epi32(a);
3663	let e = _mm256_setr_epi32(`4`, `9`, `16`, `25`, `4`, `9`, `16`, `25`);
3664	assert_eq_m256i(r, e);
3665	}
3666
3667	#[simd_test(enable = "avx")]
3668	unsafe fn test_mm256_extractf128_ps() {
3669	let a = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3670	let r = _mm256_extractf128_ps::<`0`>(a);
3671	let e = _mm_setr_ps(`4.`, `3.`, `2.`, `5.`);
3672	assert_eq_m128(r, e);
3673	}
3674
3675	#[simd_test(enable = "avx")]
3676	unsafe fn test_mm256_extractf128_pd() {
3677	let a = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3678	let r = _mm256_extractf128_pd::<`0`>(a);
3679	let e = _mm_setr_pd(`4.`, `3.`);
3680	assert_eq_m128d(r, e);
3681	}
3682
3683	#[simd_test(enable = "avx")]
3684	unsafe fn test_mm256_extractf128_si256() {
3685	let a = _mm256_setr_epi64x(`4`, `3`, `2`, `5`);
3686	let r = _mm256_extractf128_si256::<`0`>(a);
3687	let e = _mm_setr_epi64x(`4`, `3`);
3688	assert_eq_m128i(r, e);
3689	}
3690
3691	#[simd_test(enable = "avx")]
3692	unsafe fn test_mm256_zeroall() {
3693	_mm256_zeroall();
3694	}
3695
3696	#[simd_test(enable = "avx")]
3697	unsafe fn test_mm256_zeroupper() {
3698	_mm256_zeroupper();
3699	}
3700
3701	#[simd_test(enable = "avx")]
3702	unsafe fn test_mm256_permutevar_ps() {
3703	let a = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3704	let b = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
3705	let r = _mm256_permutevar_ps(a, b);
3706	let e = _mm256_setr_ps(`3.`, `2.`, `5.`, `4.`, `9.`, `64.`, `50.`, `8.`);
3707	assert_eq_m256(r, e);
3708	}
3709
3710	#[simd_test(enable = "avx")]
3711	unsafe fn test_mm_permutevar_ps() {
3712	let a = _mm_setr_ps(`4.`, `3.`, `2.`, `5.`);
3713	let b = _mm_setr_epi32(`1`, `2`, `3`, `4`);
3714	let r = _mm_permutevar_ps(a, b);
3715	let e = _mm_setr_ps(`3.`, `2.`, `5.`, `4.`);
3716	assert_eq_m128(r, e);
3717	}
3718
3719	#[simd_test(enable = "avx")]
3720	unsafe fn test_mm256_permute_ps() {
3721	let a = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3722	let r = _mm256_permute_ps::<`0x1b`>(a);
3723	let e = _mm256_setr_ps(`5.`, `2.`, `3.`, `4.`, `50.`, `64.`, `9.`, `8.`);
3724	assert_eq_m256(r, e);
3725	}
3726
3727	#[simd_test(enable = "avx")]
3728	unsafe fn test_mm_permute_ps() {
3729	let a = _mm_setr_ps(`4.`, `3.`, `2.`, `5.`);
3730	let r = _mm_permute_ps::<`0x1b`>(a);
3731	let e = _mm_setr_ps(`5.`, `2.`, `3.`, `4.`);
3732	assert_eq_m128(r, e);
3733	}
3734
3735	#[simd_test(enable = "avx")]
3736	unsafe fn test_mm256_permutevar_pd() {
3737	let a = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3738	let b = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3739	let r = _mm256_permutevar_pd(a, b);
3740	let e = _mm256_setr_pd(`4.`, `3.`, `5.`, `2.`);
3741	assert_eq_m256d(r, e);
3742	}
3743
3744	#[simd_test(enable = "avx")]
3745	unsafe fn test_mm_permutevar_pd() {
3746	let a = _mm_setr_pd(`4.`, `3.`);
3747	let b = _mm_setr_epi64x(`3`, `0`);
3748	let r = _mm_permutevar_pd(a, b);
3749	let e = _mm_setr_pd(`3.`, `4.`);
3750	assert_eq_m128d(r, e);
3751	}
3752
3753	#[simd_test(enable = "avx")]
3754	unsafe fn test_mm256_permute_pd() {
3755	let a = _mm256_setr_pd(`4.`, `3.`, `2.`, `5.`);
3756	let r = _mm256_permute_pd::<`5`>(a);
3757	let e = _mm256_setr_pd(`3.`, `4.`, `5.`, `2.`);
3758	assert_eq_m256d(r, e);
3759	}
3760
3761	#[simd_test(enable = "avx")]
3762	unsafe fn test_mm_permute_pd() {
3763	let a = _mm_setr_pd(`4.`, `3.`);
3764	let r = _mm_permute_pd::<`1`>(a);
3765	let e = _mm_setr_pd(`3.`, `4.`);
3766	assert_eq_m128d(r, e);
3767	}
3768
3769	#[simd_test(enable = "avx")]
3770	unsafe fn test_mm256_permute2f128_ps() {
3771	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `1.`, `2.`, `3.`, `4.`);
3772	let b = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `5.`, `6.`, `7.`, `8.`);
3773	let r = _mm256_permute2f128_ps::<`0x13`>(a, b);
3774	let e = _mm256_setr_ps(`5.`, `6.`, `7.`, `8.`, `1.`, `2.`, `3.`, `4.`);
3775	assert_eq_m256(r, e);
3776	}
3777
3778	#[simd_test(enable = "avx")]
3779	unsafe fn test_mm256_permute2f128_pd() {
3780	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3781	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
3782	let r = _mm256_permute2f128_pd::<`0x31`>(a, b);
3783	let e = _mm256_setr_pd(`3.`, `4.`, `7.`, `8.`);
3784	assert_eq_m256d(r, e);
3785	}
3786
3787	#[simd_test(enable = "avx")]
3788	unsafe fn test_mm256_permute2f128_si256() {
3789	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `1`, `2`, `3`, `4`);
3790	let b = _mm256_setr_epi32(`5`, `6`, `7`, `8`, `5`, `6`, `7`, `8`);
3791	let r = _mm256_permute2f128_si256::<`0x20`>(a, b);
3792	let e = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
3793	assert_eq_m256i(r, e);
3794	}
3795
3796	#[simd_test(enable = "avx")]
3797	unsafe fn test_mm256_broadcast_ss() {
3798	let r = _mm256_broadcast_ss(&`3.`);
3799	let e = _mm256_set1_ps(`3.`);
3800	assert_eq_m256(r, e);
3801	}
3802
3803	#[simd_test(enable = "avx")]
3804	unsafe fn test_mm_broadcast_ss() {
3805	let r = _mm_broadcast_ss(&`3.`);
3806	let e = _mm_set1_ps(`3.`);
3807	assert_eq_m128(r, e);
3808	}
3809
3810	#[simd_test(enable = "avx")]
3811	unsafe fn test_mm256_broadcast_sd() {
3812	let r = _mm256_broadcast_sd(&`3.`);
3813	let e = _mm256_set1_pd(`3.`);
3814	assert_eq_m256d(r, e);
3815	}
3816
3817	#[simd_test(enable = "avx")]
3818	unsafe fn test_mm256_broadcast_ps() {
3819	let a = _mm_setr_ps(`4.`, `3.`, `2.`, `5.`);
3820	let r = _mm256_broadcast_ps(&a);
3821	let e = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `4.`, `3.`, `2.`, `5.`);
3822	assert_eq_m256(r, e);
3823	}
3824
3825	#[simd_test(enable = "avx")]
3826	unsafe fn test_mm256_broadcast_pd() {
3827	let a = _mm_setr_pd(`4.`, `3.`);
3828	let r = _mm256_broadcast_pd(&a);
3829	let e = _mm256_setr_pd(`4.`, `3.`, `4.`, `3.`);
3830	assert_eq_m256d(r, e);
3831	}
3832
3833	#[simd_test(enable = "avx")]
3834	unsafe fn test_mm256_insertf128_ps() {
3835	let a = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3836	let b = _mm_setr_ps(`4.`, `9.`, `16.`, `25.`);
3837	let r = _mm256_insertf128_ps::<`0`>(a, b);
3838	let e = _mm256_setr_ps(`4.`, `9.`, `16.`, `25.`, `8.`, `9.`, `64.`, `50.`);
3839	assert_eq_m256(r, e);
3840	}
3841
3842	#[simd_test(enable = "avx")]
3843	unsafe fn test_mm256_insertf128_pd() {
3844	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3845	let b = _mm_setr_pd(`5.`, `6.`);
3846	let r = _mm256_insertf128_pd::<`0`>(a, b);
3847	let e = _mm256_setr_pd(`5.`, `6.`, `3.`, `4.`);
3848	assert_eq_m256d(r, e);
3849	}
3850
3851	#[simd_test(enable = "avx")]
3852	unsafe fn test_mm256_insertf128_si256() {
3853	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3854	let b = _mm_setr_epi64x(`5`, `6`);
3855	let r = _mm256_insertf128_si256::<`0`>(a, b);
3856	let e = _mm256_setr_epi64x(`5`, `6`, `3`, `4`);
3857	assert_eq_m256i(r, e);
3858	}
3859
3860	#[simd_test(enable = "avx")]
3861	unsafe fn test_mm256_insert_epi8() {
3862	#[rustfmt::skip]
3863	let a = _mm256_setr_epi8(
3864	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
3865	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3866	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
3867	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
3868	);
3869	let r = _mm256_insert_epi8::<`31`>(a, `0`);
3870	#[rustfmt::skip]
3871	let e = _mm256_setr_epi8(
3872	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
3873	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3874	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
3875	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`,
3876	);
3877	assert_eq_m256i(r, e);
3878	}
3879
3880	#[simd_test(enable = "avx")]
3881	unsafe fn test_mm256_insert_epi16() {
3882	#[rustfmt::skip]
3883	let a = _mm256_setr_epi16(
3884	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3885	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3886	);
3887	let r = _mm256_insert_epi16::<`15`>(a, `0`);
3888	#[rustfmt::skip]
3889	let e = _mm256_setr_epi16(
3890	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3891	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `0`,
3892	);
3893	assert_eq_m256i(r, e);
3894	}
3895
3896	#[simd_test(enable = "avx")]
3897	unsafe fn test_mm256_insert_epi32() {
3898	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
3899	let r = _mm256_insert_epi32::<`7`>(a, `0`);
3900	let e = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `0`);
3901	assert_eq_m256i(r, e);
3902	}
3903
3904	#[simd_test(enable = "avx")]
3905	unsafe fn test_mm256_load_pd() {
3906	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3907	let p = ptr::addr_of!(a) as *const f64;
3908	let r = _mm256_load_pd(p);
3909	let e = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3910	assert_eq_m256d(r, e);
3911	}
3912
3913	#[simd_test(enable = "avx")]
3914	unsafe fn test_mm256_store_pd() {
3915	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3916	let mut r = _mm256_undefined_pd();
3917	_mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
3918	assert_eq_m256d(r, a);
3919	}
3920
3921	#[simd_test(enable = "avx")]
3922	unsafe fn test_mm256_load_ps() {
3923	let a = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3924	let p = ptr::addr_of!(a) as *const f32;
3925	let r = _mm256_load_ps(p);
3926	let e = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3927	assert_eq_m256(r, e);
3928	}
3929
3930	#[simd_test(enable = "avx")]
3931	unsafe fn test_mm256_store_ps() {
3932	let a = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3933	let mut r = _mm256_undefined_ps();
3934	_mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
3935	assert_eq_m256(r, a);
3936	}
3937
3938	#[simd_test(enable = "avx")]
3939	unsafe fn test_mm256_loadu_pd() {
3940	let a = &[`1.0f64`, `2.`, `3.`, `4.`];
3941	let p = a.as_ptr();
3942	let r = _mm256_loadu_pd(black_box(p));
3943	let e = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
3944	assert_eq_m256d(r, e);
3945	}
3946
3947	#[simd_test(enable = "avx")]
3948	unsafe fn test_mm256_storeu_pd() {
3949	let a = _mm256_set1_pd(`9.`);
3950	let mut r = _mm256_undefined_pd();
3951	_mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
3952	assert_eq_m256d(r, a);
3953	}
3954
3955	#[simd_test(enable = "avx")]
3956	unsafe fn test_mm256_loadu_ps() {
3957	let a = &[`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`];
3958	let p = a.as_ptr();
3959	let r = _mm256_loadu_ps(black_box(p));
3960	let e = _mm256_setr_ps(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
3961	assert_eq_m256(r, e);
3962	}
3963
3964	#[simd_test(enable = "avx")]
3965	unsafe fn test_mm256_storeu_ps() {
3966	let a = _mm256_set1_ps(`9.`);
3967	let mut r = _mm256_undefined_ps();
3968	_mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
3969	assert_eq_m256(r, a);
3970	}
3971
3972	#[simd_test(enable = "avx")]
3973	unsafe fn test_mm256_load_si256() {
3974	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3975	let p = ptr::addr_of!(a);
3976	let r = _mm256_load_si256(p);
3977	let e = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3978	assert_eq_m256i(r, e);
3979	}
3980
3981	#[simd_test(enable = "avx")]
3982	unsafe fn test_mm256_store_si256() {
3983	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3984	let mut r = _mm256_undefined_si256();
3985	_mm256_store_si256(ptr::addr_of_mut!(r), a);
3986	assert_eq_m256i(r, a);
3987	}
3988
3989	#[simd_test(enable = "avx")]
3990	unsafe fn test_mm256_loadu_si256() {
3991	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3992	let p = ptr::addr_of!(a);
3993	let r = _mm256_loadu_si256(black_box(p));
3994	let e = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
3995	assert_eq_m256i(r, e);
3996	}
3997
3998	#[simd_test(enable = "avx")]
3999	unsafe fn test_mm256_storeu_si256() {
4000	let a = _mm256_set1_epi8(`9`);
4001	let mut r = _mm256_undefined_si256();
4002	_mm256_storeu_si256(ptr::addr_of_mut!(r), a);
4003	assert_eq_m256i(r, a);
4004	}
4005
4006	#[simd_test(enable = "avx")]
4007	unsafe fn test_mm256_maskload_pd() {
4008	let a = &[`1.0f64`, `2.`, `3.`, `4.`];
4009	let p = a.as_ptr();
4010	let mask = _mm256_setr_epi64x(`0`, !`0`, `0`, !`0`);
4011	let r = _mm256_maskload_pd(black_box(p), mask);
4012	let e = _mm256_setr_pd(`0.`, `2.`, `0.`, `4.`);
4013	assert_eq_m256d(r, e);
4014	}
4015
4016	#[simd_test(enable = "avx")]
4017	unsafe fn test_mm256_maskstore_pd() {
4018	let mut r = _mm256_set1_pd(`0.`);
4019	let mask = _mm256_setr_epi64x(`0`, !`0`, `0`, !`0`);
4020	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4021	_mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4022	let e = _mm256_setr_pd(`0.`, `2.`, `0.`, `4.`);
4023	assert_eq_m256d(r, e);
4024	}
4025
4026	#[simd_test(enable = "avx")]
4027	unsafe fn test_mm_maskload_pd() {
4028	let a = &[`1.0f64`, `2.`];
4029	let p = a.as_ptr();
4030	let mask = _mm_setr_epi64x(`0`, !`0`);
4031	let r = _mm_maskload_pd(black_box(p), mask);
4032	let e = _mm_setr_pd(`0.`, `2.`);
4033	assert_eq_m128d(r, e);
4034	}
4035
4036	#[simd_test(enable = "avx")]
4037	unsafe fn test_mm_maskstore_pd() {
4038	let mut r = _mm_set1_pd(`0.`);
4039	let mask = _mm_setr_epi64x(`0`, !`0`);
4040	let a = _mm_setr_pd(`1.`, `2.`);
4041	_mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4042	let e = _mm_setr_pd(`0.`, `2.`);
4043	assert_eq_m128d(r, e);
4044	}
4045
4046	#[simd_test(enable = "avx")]
4047	unsafe fn test_mm256_maskload_ps() {
4048	let a = &[`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
4049	let p = a.as_ptr();
4050	let mask = _mm256_setr_epi32(`0`, !`0`, `0`, !`0`, `0`, !`0`, `0`, !`0`);
4051	let r = _mm256_maskload_ps(black_box(p), mask);
4052	let e = _mm256_setr_ps(`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`);
4053	assert_eq_m256(r, e);
4054	}
4055
4056	#[simd_test(enable = "avx")]
4057	unsafe fn test_mm256_maskstore_ps() {
4058	let mut r = _mm256_set1_ps(`0.`);
4059	let mask = _mm256_setr_epi32(`0`, !`0`, `0`, !`0`, `0`, !`0`, `0`, !`0`);
4060	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4061	_mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4062	let e = _mm256_setr_ps(`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`);
4063	assert_eq_m256(r, e);
4064	}
4065
4066	#[simd_test(enable = "avx")]
4067	unsafe fn test_mm_maskload_ps() {
4068	let a = &[`1.0f32`, `2.`, `3.`, `4.`];
4069	let p = a.as_ptr();
4070	let mask = _mm_setr_epi32(`0`, !`0`, `0`, !`0`);
4071	let r = _mm_maskload_ps(black_box(p), mask);
4072	let e = _mm_setr_ps(`0.`, `2.`, `0.`, `4.`);
4073	assert_eq_m128(r, e);
4074	}
4075
4076	#[simd_test(enable = "avx")]
4077	unsafe fn test_mm_maskstore_ps() {
4078	let mut r = _mm_set1_ps(`0.`);
4079	let mask = _mm_setr_epi32(`0`, !`0`, `0`, !`0`);
4080	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
4081	_mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4082	let e = _mm_setr_ps(`0.`, `2.`, `0.`, `4.`);
4083	assert_eq_m128(r, e);
4084	}
4085
4086	#[simd_test(enable = "avx")]
4087	unsafe fn test_mm256_movehdup_ps() {
4088	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4089	let r = _mm256_movehdup_ps(a);
4090	let e = _mm256_setr_ps(`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`);
4091	assert_eq_m256(r, e);
4092	}
4093
4094	#[simd_test(enable = "avx")]
4095	unsafe fn test_mm256_moveldup_ps() {
4096	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4097	let r = _mm256_moveldup_ps(a);
4098	let e = _mm256_setr_ps(`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`);
4099	assert_eq_m256(r, e);
4100	}
4101
4102	#[simd_test(enable = "avx")]
4103	unsafe fn test_mm256_movedup_pd() {
4104	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4105	let r = _mm256_movedup_pd(a);
4106	let e = _mm256_setr_pd(`1.`, `1.`, `3.`, `3.`);
4107	assert_eq_m256d(r, e);
4108	}
4109
4110	#[simd_test(enable = "avx")]
4111	unsafe fn test_mm256_lddqu_si256() {
4112	#[rustfmt::skip]
4113	let a = _mm256_setr_epi8(
4114	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4115	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4116	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4117	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4118	);
4119	let p = ptr::addr_of!(a);
4120	let r = _mm256_lddqu_si256(black_box(p));
4121	#[rustfmt::skip]
4122	let e = _mm256_setr_epi8(
4123	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4124	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4125	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4126	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4127	);
4128	assert_eq_m256i(r, e);
4129	}
4130
4131	#[simd_test(enable = "avx")]
4132	unsafe fn test_mm256_stream_si256() {
4133	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4134	let mut r = _mm256_undefined_si256();
4135	_mm256_stream_si256(ptr::addr_of_mut!(r), a);
4136	assert_eq_m256i(r, a);
4137	}
4138
4139	#[simd_test(enable = "avx")]
4140	unsafe fn test_mm256_stream_pd() {
4141	#[repr(align(`32`))]
4142	struct Memory {
4143	pub data: [f64; `4`],
4144	}
4145	let a = _mm256_set1_pd(`7.0`);
4146	let mut mem = Memory { data: [`-1.0`; `4`] };
4147
4148	_mm256_stream_pd(ptr::addr_of_mut!(mem.data[`0`]), a);
4149	for i in `0`..`4` {
4150	assert_eq!(mem.data[i], get_m256d(a, i));
4151	}
4152	}
4153
4154	#[simd_test(enable = "avx")]
4155	unsafe fn test_mm256_stream_ps() {
4156	#[repr(align(`32`))]
4157	struct Memory {
4158	pub data: [f32; `8`],
4159	}
4160	let a = _mm256_set1_ps(`7.0`);
4161	let mut mem = Memory { data: [`-1.0`; `8`] };
4162
4163	_mm256_stream_ps(ptr::addr_of_mut!(mem.data[`0`]), a);
4164	for i in `0`..`8` {
4165	assert_eq!(mem.data[i], get_m256(a, i));
4166	}
4167	}
4168
4169	#[simd_test(enable = "avx")]
4170	unsafe fn test_mm256_rcp_ps() {
4171	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4172	let r = _mm256_rcp_ps(a);
4173	#[rustfmt::skip]
4174	let e = _mm256_setr_ps(
4175	`0.99975586`, `0.49987793`, `0.33325195`, `0.24993896`,
4176	`0.19995117`, `0.16662598`, `0.14282227`, `0.12496948`,
4177	);
4178	let rel_err = `0.00048828125`;
4179	for i in `0`..`8` {
4180	assert_approx_eq!(get_m256(r, i), get_m256(e, i), `2.` * rel_err);
4181	}
4182	}
4183
4184	#[simd_test(enable = "avx")]
4185	unsafe fn test_mm256_rsqrt_ps() {
4186	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4187	let r = _mm256_rsqrt_ps(a);
4188	#[rustfmt::skip]
4189	let e = _mm256_setr_ps(
4190	`0.99975586`, `0.7069092`, `0.5772705`, `0.49987793`,
4191	`0.44714355`, `0.40820313`, `0.3779297`, `0.3534546`,
4192	);
4193	let rel_err = `0.00048828125`;
4194	for i in `0`..`8` {
4195	assert_approx_eq!(get_m256(r, i), get_m256(e, i), `2.` * rel_err);
4196	}
4197	}
4198
4199	#[simd_test(enable = "avx")]
4200	unsafe fn test_mm256_unpackhi_pd() {
4201	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4202	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
4203	let r = _mm256_unpackhi_pd(a, b);
4204	let e = _mm256_setr_pd(`2.`, `6.`, `4.`, `8.`);
4205	assert_eq_m256d(r, e);
4206	}
4207
4208	#[simd_test(enable = "avx")]
4209	unsafe fn test_mm256_unpackhi_ps() {
4210	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4211	let b = _mm256_setr_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
4212	let r = _mm256_unpackhi_ps(a, b);
4213	let e = _mm256_setr_ps(`3.`, `11.`, `4.`, `12.`, `7.`, `15.`, `8.`, `16.`);
4214	assert_eq_m256(r, e);
4215	}
4216
4217	#[simd_test(enable = "avx")]
4218	unsafe fn test_mm256_unpacklo_pd() {
4219	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4220	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
4221	let r = _mm256_unpacklo_pd(a, b);
4222	let e = _mm256_setr_pd(`1.`, `5.`, `3.`, `7.`);
4223	assert_eq_m256d(r, e);
4224	}
4225
4226	#[simd_test(enable = "avx")]
4227	unsafe fn test_mm256_unpacklo_ps() {
4228	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4229	let b = _mm256_setr_ps(`9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
4230	let r = _mm256_unpacklo_ps(a, b);
4231	let e = _mm256_setr_ps(`1.`, `9.`, `2.`, `10.`, `5.`, `13.`, `6.`, `14.`);
4232	assert_eq_m256(r, e);
4233	}
4234
4235	#[simd_test(enable = "avx")]
4236	unsafe fn test_mm256_testz_si256() {
4237	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4238	let b = _mm256_setr_epi64x(`5`, `6`, `7`, `8`);
4239	let r = _mm256_testz_si256(a, b);
4240	assert_eq!(r, `0`);
4241	let b = _mm256_set1_epi64x(`0`);
4242	let r = _mm256_testz_si256(a, b);
4243	assert_eq!(r, `1`);
4244	}
4245
4246	#[simd_test(enable = "avx")]
4247	unsafe fn test_mm256_testc_si256() {
4248	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4249	let b = _mm256_setr_epi64x(`5`, `6`, `7`, `8`);
4250	let r = _mm256_testc_si256(a, b);
4251	assert_eq!(r, `0`);
4252	let b = _mm256_set1_epi64x(`0`);
4253	let r = _mm256_testc_si256(a, b);
4254	assert_eq!(r, `1`);
4255	}
4256
4257	#[simd_test(enable = "avx")]
4258	unsafe fn test_mm256_testnzc_si256() {
4259	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4260	let b = _mm256_setr_epi64x(`5`, `6`, `7`, `8`);
4261	let r = _mm256_testnzc_si256(a, b);
4262	assert_eq!(r, `1`);
4263	let a = _mm256_setr_epi64x(`0`, `0`, `0`, `0`);
4264	let b = _mm256_setr_epi64x(`0`, `0`, `0`, `0`);
4265	let r = _mm256_testnzc_si256(a, b);
4266	assert_eq!(r, `0`);
4267	}
4268
4269	#[simd_test(enable = "avx")]
4270	unsafe fn test_mm256_testz_pd() {
4271	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4272	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
4273	let r = _mm256_testz_pd(a, b);
4274	assert_eq!(r, `1`);
4275	let a = _mm256_set1_pd(`-1.`);
4276	let r = _mm256_testz_pd(a, a);
4277	assert_eq!(r, `0`);
4278	}
4279
4280	#[simd_test(enable = "avx")]
4281	unsafe fn test_mm256_testc_pd() {
4282	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4283	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
4284	let r = _mm256_testc_pd(a, b);
4285	assert_eq!(r, `1`);
4286	let a = _mm256_set1_pd(`1.`);
4287	let b = _mm256_set1_pd(`-1.`);
4288	let r = _mm256_testc_pd(a, b);
4289	assert_eq!(r, `0`);
4290	}
4291
4292	#[simd_test(enable = "avx")]
4293	unsafe fn test_mm256_testnzc_pd() {
4294	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4295	let b = _mm256_setr_pd(`5.`, `6.`, `7.`, `8.`);
4296	let r = _mm256_testnzc_pd(a, b);
4297	assert_eq!(r, `0`);
4298	let a = _mm256_setr_pd(`1.`, `-1.`, `-1.`, `-1.`);
4299	let b = _mm256_setr_pd(`-1.`, `-1.`, `1.`, `1.`);
4300	let r = _mm256_testnzc_pd(a, b);
4301	assert_eq!(r, `1`);
4302	}
4303
4304	#[simd_test(enable = "avx")]
4305	unsafe fn test_mm_testz_pd() {
4306	let a = _mm_setr_pd(`1.`, `2.`);
4307	let b = _mm_setr_pd(`5.`, `6.`);
4308	let r = _mm_testz_pd(a, b);
4309	assert_eq!(r, `1`);
4310	let a = _mm_set1_pd(`-1.`);
4311	let r = _mm_testz_pd(a, a);
4312	assert_eq!(r, `0`);
4313	}
4314
4315	#[simd_test(enable = "avx")]
4316	unsafe fn test_mm_testc_pd() {
4317	let a = _mm_setr_pd(`1.`, `2.`);
4318	let b = _mm_setr_pd(`5.`, `6.`);
4319	let r = _mm_testc_pd(a, b);
4320	assert_eq!(r, `1`);
4321	let a = _mm_set1_pd(`1.`);
4322	let b = _mm_set1_pd(`-1.`);
4323	let r = _mm_testc_pd(a, b);
4324	assert_eq!(r, `0`);
4325	}
4326
4327	#[simd_test(enable = "avx")]
4328	unsafe fn test_mm_testnzc_pd() {
4329	let a = _mm_setr_pd(`1.`, `2.`);
4330	let b = _mm_setr_pd(`5.`, `6.`);
4331	let r = _mm_testnzc_pd(a, b);
4332	assert_eq!(r, `0`);
4333	let a = _mm_setr_pd(`1.`, `-1.`);
4334	let b = _mm_setr_pd(`-1.`, `-1.`);
4335	let r = _mm_testnzc_pd(a, b);
4336	assert_eq!(r, `1`);
4337	}
4338
4339	#[simd_test(enable = "avx")]
4340	unsafe fn test_mm256_testz_ps() {
4341	let a = _mm256_set1_ps(`1.`);
4342	let r = _mm256_testz_ps(a, a);
4343	assert_eq!(r, `1`);
4344	let a = _mm256_set1_ps(`-1.`);
4345	let r = _mm256_testz_ps(a, a);
4346	assert_eq!(r, `0`);
4347	}
4348
4349	#[simd_test(enable = "avx")]
4350	unsafe fn test_mm256_testc_ps() {
4351	let a = _mm256_set1_ps(`1.`);
4352	let r = _mm256_testc_ps(a, a);
4353	assert_eq!(r, `1`);
4354	let b = _mm256_set1_ps(`-1.`);
4355	let r = _mm256_testc_ps(a, b);
4356	assert_eq!(r, `0`);
4357	}
4358
4359	#[simd_test(enable = "avx")]
4360	unsafe fn test_mm256_testnzc_ps() {
4361	let a = _mm256_set1_ps(`1.`);
4362	let r = _mm256_testnzc_ps(a, a);
4363	assert_eq!(r, `0`);
4364	let a = _mm256_setr_ps(`1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`);
4365	let b = _mm256_setr_ps(`-1.`, `-1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`);
4366	let r = _mm256_testnzc_ps(a, b);
4367	assert_eq!(r, `1`);
4368	}
4369
4370	#[simd_test(enable = "avx")]
4371	unsafe fn test_mm_testz_ps() {
4372	let a = _mm_set1_ps(`1.`);
4373	let r = _mm_testz_ps(a, a);
4374	assert_eq!(r, `1`);
4375	let a = _mm_set1_ps(`-1.`);
4376	let r = _mm_testz_ps(a, a);
4377	assert_eq!(r, `0`);
4378	}
4379
4380	#[simd_test(enable = "avx")]
4381	unsafe fn test_mm_testc_ps() {
4382	let a = _mm_set1_ps(`1.`);
4383	let r = _mm_testc_ps(a, a);
4384	assert_eq!(r, `1`);
4385	let b = _mm_set1_ps(`-1.`);
4386	let r = _mm_testc_ps(a, b);
4387	assert_eq!(r, `0`);
4388	}
4389
4390	#[simd_test(enable = "avx")]
4391	unsafe fn test_mm_testnzc_ps() {
4392	let a = _mm_set1_ps(`1.`);
4393	let r = _mm_testnzc_ps(a, a);
4394	assert_eq!(r, `0`);
4395	let a = _mm_setr_ps(`1.`, `-1.`, `-1.`, `-1.`);
4396	let b = _mm_setr_ps(`-1.`, `-1.`, `1.`, `1.`);
4397	let r = _mm_testnzc_ps(a, b);
4398	assert_eq!(r, `1`);
4399	}
4400
4401	#[simd_test(enable = "avx")]
4402	unsafe fn test_mm256_movemask_pd() {
4403	let a = _mm256_setr_pd(`1.`, `-2.`, `3.`, `-4.`);
4404	let r = _mm256_movemask_pd(a);
4405	assert_eq!(r, `0xA`);
4406	}
4407
4408	#[simd_test(enable = "avx")]
4409	unsafe fn test_mm256_movemask_ps() {
4410	let a = _mm256_setr_ps(`1.`, `-2.`, `3.`, `-4.`, `1.`, `-2.`, `3.`, `-4.`);
4411	let r = _mm256_movemask_ps(a);
4412	assert_eq!(r, `0xAA`);
4413	}
4414
4415	#[simd_test(enable = "avx")]
4416	unsafe fn test_mm256_setzero_pd() {
4417	let r = _mm256_setzero_pd();
4418	assert_eq_m256d(r, _mm256_set1_pd(`0.`));
4419	}
4420
4421	#[simd_test(enable = "avx")]
4422	unsafe fn test_mm256_setzero_ps() {
4423	let r = _mm256_setzero_ps();
4424	assert_eq_m256(r, _mm256_set1_ps(`0.`));
4425	}
4426
4427	#[simd_test(enable = "avx")]
4428	unsafe fn test_mm256_setzero_si256() {
4429	let r = _mm256_setzero_si256();
4430	assert_eq_m256i(r, _mm256_set1_epi8(`0`));
4431	}
4432
4433	#[simd_test(enable = "avx")]
4434	unsafe fn test_mm256_set_pd() {
4435	let r = _mm256_set_pd(`1.`, `2.`, `3.`, `4.`);
4436	assert_eq_m256d(r, _mm256_setr_pd(`4.`, `3.`, `2.`, `1.`));
4437	}
4438
4439	#[simd_test(enable = "avx")]
4440	unsafe fn test_mm256_set_ps() {
4441	let r = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4442	assert_eq_m256(r, _mm256_setr_ps(`8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`));
4443	}
4444
4445	#[simd_test(enable = "avx")]
4446	unsafe fn test_mm256_set_epi8() {
4447	#[rustfmt::skip]
4448	let r = _mm256_set_epi8(
4449	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4450	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4451	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4452	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4453	);
4454	#[rustfmt::skip]
4455	let e = _mm256_setr_epi8(
4456	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`,
4457	`24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`,
4458	`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`,
4459	`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`
4460	);
4461	assert_eq_m256i(r, e);
4462	}
4463
4464	#[simd_test(enable = "avx")]
4465	unsafe fn test_mm256_set_epi16() {
4466	#[rustfmt::skip]
4467	let r = _mm256_set_epi16(
4468	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4469	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4470	);
4471	#[rustfmt::skip]
4472	let e = _mm256_setr_epi16(
4473	`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`,
4474	`7`, `6`, `5`, `4`, `3`, `2`, `1`,
4475	);
4476	assert_eq_m256i(r, e);
4477	}
4478
4479	#[simd_test(enable = "avx")]
4480	unsafe fn test_mm256_set_epi32() {
4481	let r = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
4482	assert_eq_m256i(r, _mm256_setr_epi32(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`));
4483	}
4484
4485	#[simd_test(enable = "avx")]
4486	unsafe fn test_mm256_set_epi64x() {
4487	let r = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
4488	assert_eq_m256i(r, _mm256_setr_epi64x(`4`, `3`, `2`, `1`));
4489	}
4490
4491	#[simd_test(enable = "avx")]
4492	unsafe fn test_mm256_setr_pd() {
4493	let r = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4494	assert_eq_m256d(r, _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`));
4495	}
4496
4497	#[simd_test(enable = "avx")]
4498	unsafe fn test_mm256_setr_ps() {
4499	let r = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4500	assert_eq_m256(r, _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`));
4501	}
4502
4503	#[simd_test(enable = "avx")]
4504	unsafe fn test_mm256_setr_epi8() {
4505	#[rustfmt::skip]
4506	let r = _mm256_setr_epi8(
4507	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4508	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4509	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4510	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4511	);
4512	#[rustfmt::skip]
4513	let e = _mm256_setr_epi8(
4514	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4515	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4516	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4517	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`
4518	);
4519
4520	assert_eq_m256i(r, e);
4521	}
4522
4523	#[simd_test(enable = "avx")]
4524	unsafe fn test_mm256_setr_epi16() {
4525	#[rustfmt::skip]
4526	let r = _mm256_setr_epi16(
4527	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4528	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4529	);
4530	#[rustfmt::skip]
4531	let e = _mm256_setr_epi16(
4532	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4533	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4534	);
4535	assert_eq_m256i(r, e);
4536	}
4537
4538	#[simd_test(enable = "avx")]
4539	unsafe fn test_mm256_setr_epi32() {
4540	let r = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
4541	assert_eq_m256i(r, _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`));
4542	}
4543
4544	#[simd_test(enable = "avx")]
4545	unsafe fn test_mm256_setr_epi64x() {
4546	let r = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4547	assert_eq_m256i(r, _mm256_setr_epi64x(`1`, `2`, `3`, `4`));
4548	}
4549
4550	#[simd_test(enable = "avx")]
4551	unsafe fn test_mm256_set1_pd() {
4552	let r = _mm256_set1_pd(`1.`);
4553	assert_eq_m256d(r, _mm256_set1_pd(`1.`));
4554	}
4555
4556	#[simd_test(enable = "avx")]
4557	unsafe fn test_mm256_set1_ps() {
4558	let r = _mm256_set1_ps(`1.`);
4559	assert_eq_m256(r, _mm256_set1_ps(`1.`));
4560	}
4561
4562	#[simd_test(enable = "avx")]
4563	unsafe fn test_mm256_set1_epi8() {
4564	let r = _mm256_set1_epi8(`1`);
4565	assert_eq_m256i(r, _mm256_set1_epi8(`1`));
4566	}
4567
4568	#[simd_test(enable = "avx")]
4569	unsafe fn test_mm256_set1_epi16() {
4570	let r = _mm256_set1_epi16(`1`);
4571	assert_eq_m256i(r, _mm256_set1_epi16(`1`));
4572	}
4573
4574	#[simd_test(enable = "avx")]
4575	unsafe fn test_mm256_set1_epi32() {
4576	let r = _mm256_set1_epi32(`1`);
4577	assert_eq_m256i(r, _mm256_set1_epi32(`1`));
4578	}
4579
4580	#[simd_test(enable = "avx")]
4581	unsafe fn test_mm256_set1_epi64x() {
4582	let r = _mm256_set1_epi64x(`1`);
4583	assert_eq_m256i(r, _mm256_set1_epi64x(`1`));
4584	}
4585
4586	#[simd_test(enable = "avx")]
4587	unsafe fn test_mm256_castpd_ps() {
4588	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4589	let r = _mm256_castpd_ps(a);
4590	let e = _mm256_setr_ps(`0.`, `1.875`, `0.`, `2.`, `0.`, `2.125`, `0.`, `2.25`);
4591	assert_eq_m256(r, e);
4592	}
4593
4594	#[simd_test(enable = "avx")]
4595	unsafe fn test_mm256_castps_pd() {
4596	let a = _mm256_setr_ps(`0.`, `1.875`, `0.`, `2.`, `0.`, `2.125`, `0.`, `2.25`);
4597	let r = _mm256_castps_pd(a);
4598	let e = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4599	assert_eq_m256d(r, e);
4600	}
4601
4602	#[simd_test(enable = "avx")]
4603	unsafe fn test_mm256_castps_si256() {
4604	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4605	let r = _mm256_castps_si256(a);
4606	#[rustfmt::skip]
4607	let e = _mm256_setr_epi8(
4608	`0`, `0`, `-128`, `63`, `0`, `0`, `0`, `64`,
4609	`0`, `0`, `64`, `64`, `0`, `0`, `-128`, `64`,
4610	`0`, `0`, `-96`, `64`, `0`, `0`, `-64`, `64`,
4611	`0`, `0`, `-32`, `64`, `0`, `0`, `0`, `65`,
4612	);
4613	assert_eq_m256i(r, e);
4614	}
4615
4616	#[simd_test(enable = "avx")]
4617	unsafe fn test_mm256_castsi256_ps() {
4618	#[rustfmt::skip]
4619	let a = _mm256_setr_epi8(
4620	`0`, `0`, `-128`, `63`, `0`, `0`, `0`, `64`,
4621	`0`, `0`, `64`, `64`, `0`, `0`, `-128`, `64`,
4622	`0`, `0`, `-96`, `64`, `0`, `0`, `-64`, `64`,
4623	`0`, `0`, `-32`, `64`, `0`, `0`, `0`, `65`,
4624	);
4625	let r = _mm256_castsi256_ps(a);
4626	let e = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4627	assert_eq_m256(r, e);
4628	}
4629
4630	#[simd_test(enable = "avx")]
4631	unsafe fn test_mm256_castpd_si256() {
4632	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4633	let r = _mm256_castpd_si256(a);
4634	assert_eq_m256d(transmute(r), a);
4635	}
4636
4637	#[simd_test(enable = "avx")]
4638	unsafe fn test_mm256_castsi256_pd() {
4639	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4640	let r = _mm256_castsi256_pd(a);
4641	assert_eq_m256d(r, transmute(a));
4642	}
4643
4644	#[simd_test(enable = "avx")]
4645	unsafe fn test_mm256_castps256_ps128() {
4646	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4647	let r = _mm256_castps256_ps128(a);
4648	assert_eq_m128(r, _mm_setr_ps(`1.`, `2.`, `3.`, `4.`));
4649	}
4650
4651	#[simd_test(enable = "avx")]
4652	unsafe fn test_mm256_castpd256_pd128() {
4653	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4654	let r = _mm256_castpd256_pd128(a);
4655	assert_eq_m128d(r, _mm_setr_pd(`1.`, `2.`));
4656	}
4657
4658	#[simd_test(enable = "avx")]
4659	unsafe fn test_mm256_castsi256_si128() {
4660	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4661	let r = _mm256_castsi256_si128(a);
4662	assert_eq_m128i(r, _mm_setr_epi64x(`1`, `2`));
4663	}
4664
4665	#[simd_test(enable = "avx")]
4666	unsafe fn test_mm256_zextps128_ps256() {
4667	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
4668	let r = _mm256_zextps128_ps256(a);
4669	let e = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `0.`, `0.`, `0.`);
4670	assert_eq_m256(r, e);
4671	}
4672
4673	#[simd_test(enable = "avx")]
4674	unsafe fn test_mm256_zextsi128_si256() {
4675	let a = _mm_setr_epi64x(`1`, `2`);
4676	let r = _mm256_zextsi128_si256(a);
4677	let e = _mm256_setr_epi64x(`1`, `2`, `0`, `0`);
4678	assert_eq_m256i(r, e);
4679	}
4680
4681	#[simd_test(enable = "avx")]
4682	unsafe fn test_mm256_zextpd128_pd256() {
4683	let a = _mm_setr_pd(`1.`, `2.`);
4684	let r = _mm256_zextpd128_pd256(a);
4685	let e = _mm256_setr_pd(`1.`, `2.`, `0.`, `0.`);
4686	assert_eq_m256d(r, e);
4687	}
4688
4689	#[simd_test(enable = "avx")]
4690	unsafe fn test_mm256_set_m128() {
4691	let hi = _mm_setr_ps(`5.`, `6.`, `7.`, `8.`);
4692	let lo = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
4693	let r = _mm256_set_m128(hi, lo);
4694	let e = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4695	assert_eq_m256(r, e);
4696	}
4697
4698	#[simd_test(enable = "avx")]
4699	unsafe fn test_mm256_set_m128d() {
4700	let hi = _mm_setr_pd(`3.`, `4.`);
4701	let lo = _mm_setr_pd(`1.`, `2.`);
4702	let r = _mm256_set_m128d(hi, lo);
4703	let e = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4704	assert_eq_m256d(r, e);
4705	}
4706
4707	#[simd_test(enable = "avx")]
4708	unsafe fn test_mm256_set_m128i() {
4709	#[rustfmt::skip]
4710	let hi = _mm_setr_epi8(
4711	`17`, `18`, `19`, `20`,
4712	`21`, `22`, `23`, `24`,
4713	`25`, `26`, `27`, `28`,
4714	`29`, `30`, `31`, `32`,
4715	);
4716	#[rustfmt::skip]
4717	let lo = _mm_setr_epi8(
4718	`1`, `2`, `3`, `4`,
4719	`5`, `6`, `7`, `8`,
4720	`9`, `10`, `11`, `12`,
4721	`13`, `14`, `15`, `16`,
4722	);
4723	let r = _mm256_set_m128i(hi, lo);
4724	#[rustfmt::skip]
4725	let e = _mm256_setr_epi8(
4726	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4727	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4728	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4729	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4730	);
4731	assert_eq_m256i(r, e);
4732	}
4733
4734	#[simd_test(enable = "avx")]
4735	unsafe fn test_mm256_setr_m128() {
4736	let lo = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
4737	let hi = _mm_setr_ps(`5.`, `6.`, `7.`, `8.`);
4738	let r = _mm256_setr_m128(lo, hi);
4739	let e = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4740	assert_eq_m256(r, e);
4741	}
4742
4743	#[simd_test(enable = "avx")]
4744	unsafe fn test_mm256_setr_m128d() {
4745	let lo = _mm_setr_pd(`1.`, `2.`);
4746	let hi = _mm_setr_pd(`3.`, `4.`);
4747	let r = _mm256_setr_m128d(lo, hi);
4748	let e = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4749	assert_eq_m256d(r, e);
4750	}
4751
4752	#[simd_test(enable = "avx")]
4753	unsafe fn test_mm256_setr_m128i() {
4754	#[rustfmt::skip]
4755	let lo = _mm_setr_epi8(
4756	`1`, `2`, `3`, `4`,
4757	`5`, `6`, `7`, `8`,
4758	`9`, `10`, `11`, `12`,
4759	`13`, `14`, `15`, `16`,
4760	);
4761	#[rustfmt::skip]
4762	let hi = _mm_setr_epi8(
4763	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4764	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4765	);
4766	let r = _mm256_setr_m128i(lo, hi);
4767	#[rustfmt::skip]
4768	let e = _mm256_setr_epi8(
4769	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4770	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4771	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4772	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4773	);
4774	assert_eq_m256i(r, e);
4775	}
4776
4777	#[simd_test(enable = "avx")]
4778	unsafe fn test_mm256_loadu2_m128() {
4779	let hi = &[`5.`, `6.`, `7.`, `8.`];
4780	let hiaddr = hi.as_ptr();
4781	let lo = &[`1.`, `2.`, `3.`, `4.`];
4782	let loaddr = lo.as_ptr();
4783	let r = _mm256_loadu2_m128(hiaddr, loaddr);
4784	let e = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4785	assert_eq_m256(r, e);
4786	}
4787
4788	#[simd_test(enable = "avx")]
4789	unsafe fn test_mm256_loadu2_m128d() {
4790	let hi = &[`3.`, `4.`];
4791	let hiaddr = hi.as_ptr();
4792	let lo = &[`1.`, `2.`];
4793	let loaddr = lo.as_ptr();
4794	let r = _mm256_loadu2_m128d(hiaddr, loaddr);
4795	let e = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4796	assert_eq_m256d(r, e);
4797	}
4798
4799	#[simd_test(enable = "avx")]
4800	unsafe fn test_mm256_loadu2_m128i() {
4801	#[rustfmt::skip]
4802	let hi = _mm_setr_epi8(
4803	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4804	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4805	);
4806	#[rustfmt::skip]
4807	let lo = _mm_setr_epi8(
4808	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4809	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4810	);
4811	let r = _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _);
4812	#[rustfmt::skip]
4813	let e = _mm256_setr_epi8(
4814	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4815	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4816	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4817	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4818	);
4819	assert_eq_m256i(r, e);
4820	}
4821
4822	#[simd_test(enable = "avx")]
4823	unsafe fn test_mm256_storeu2_m128() {
4824	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4825	let mut hi = _mm_undefined_ps();
4826	let mut lo = _mm_undefined_ps();
4827	_mm256_storeu2_m128(
4828	ptr::addr_of_mut!(hi) as *mut f32,
4829	ptr::addr_of_mut!(lo) as *mut f32,
4830	a,
4831	);
4832	assert_eq_m128(hi, _mm_setr_ps(`5.`, `6.`, `7.`, `8.`));
4833	assert_eq_m128(lo, _mm_setr_ps(`1.`, `2.`, `3.`, `4.`));
4834	}
4835
4836	#[simd_test(enable = "avx")]
4837	unsafe fn test_mm256_storeu2_m128d() {
4838	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
4839	let mut hi = _mm_undefined_pd();
4840	let mut lo = _mm_undefined_pd();
4841	_mm256_storeu2_m128d(
4842	ptr::addr_of_mut!(hi) as *mut f64,
4843	ptr::addr_of_mut!(lo) as *mut f64,
4844	a,
4845	);
4846	assert_eq_m128d(hi, _mm_setr_pd(`3.`, `4.`));
4847	assert_eq_m128d(lo, _mm_setr_pd(`1.`, `2.`));
4848	}
4849
4850	#[simd_test(enable = "avx")]
4851	unsafe fn test_mm256_storeu2_m128i() {
4852	#[rustfmt::skip]
4853	let a = _mm256_setr_epi8(
4854	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4855	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
4856	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4857	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4858	);
4859	let mut hi = _mm_undefined_si128();
4860	let mut lo = _mm_undefined_si128();
4861	_mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
4862	#[rustfmt::skip]
4863	let e_hi = _mm_setr_epi8(
4864	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
4865	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`
4866	);
4867	#[rustfmt::skip]
4868	let e_lo = _mm_setr_epi8(
4869	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
4870	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`
4871	);
4872
4873	assert_eq_m128i(hi, e_hi);
4874	assert_eq_m128i(lo, e_lo);
4875	}
4876
4877	#[simd_test(enable = "avx")]
4878	unsafe fn test_mm256_cvtss_f32() {
4879	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
4880	let r = _mm256_cvtss_f32(a);
4881	assert_eq!(r, `1.`);
4882	}
4883	}
4884