avx2.rs source code [crates/core_arch/src/x86/avx2.rs]

1	//! Advanced Vector Extensions 2 (AVX)
2	//!
3	//! AVX2 expands most AVX commands to 256-bit wide vector registers and
4	//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
5	//!
6	//! The references are:
7	//!
8	//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
9	//! Instruction Set Reference, A-Z][intel64_ref].
10	//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
11	//! System Instructions][amd64_ref].
12	//!
13	//! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
14	//! overview of the instructions available.
15	//!
16	//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
17	//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
18	//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
19	//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
20
21	use crate::core_arch::{simd::, x86::};
22	use crate::intrinsics::simd::*;
23
24	#[cfg(test)]
25	use stdarch_test::assert_instr;
26
27	/// Computes the absolute values of packed 32-bit integers in `a`.
28	///
29	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi32)
30	#[inline]
31	#[target_feature(enable = "avx2")]
32	#[cfg_attr(test, assert_instr(vpabsd))]
33	#[stable(feature = "simd_x86", since = "1.27.0")]
34	pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i {
35	let a: i32x8 = a.as_i32x8();
36	let zero: i32x8 = i32x8::splat(`0`);
37	let r: i32x8 = simd_select::<m32x8, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
38	transmute(src:r)
39	}
40
41	/// Computes the absolute values of packed 16-bit integers in `a`.
42	///
43	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi16)
44	#[inline]
45	#[target_feature(enable = "avx2")]
46	#[cfg_attr(test, assert_instr(vpabsw))]
47	#[stable(feature = "simd_x86", since = "1.27.0")]
48	pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i {
49	let a: i16x16 = a.as_i16x16();
50	let zero: i16x16 = i16x16::splat(`0`);
51	let r: i16x16 = simd_select::<m16x16, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
52	transmute(src:r)
53	}
54
55	/// Computes the absolute values of packed 8-bit integers in `a`.
56	///
57	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi8)
58	#[inline]
59	#[target_feature(enable = "avx2")]
60	#[cfg_attr(test, assert_instr(vpabsb))]
61	#[stable(feature = "simd_x86", since = "1.27.0")]
62	pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i {
63	let a: i8x32 = a.as_i8x32();
64	let zero: i8x32 = i8x32::splat(`0`);
65	let r: i8x32 = simd_select::<m8x32, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
66	transmute(src:r)
67	}
68
69	/// Adds packed 64-bit integers in `a` and `b`.
70	///
71	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64)
72	#[inline]
73	#[target_feature(enable = "avx2")]
74	#[cfg_attr(test, assert_instr(vpaddq))]
75	#[stable(feature = "simd_x86", since = "1.27.0")]
76	pub unsafe fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
77	transmute(src:simd_add(x:a.as_i64x4(), y:b.as_i64x4()))
78	}
79
80	/// Adds packed 32-bit integers in `a` and `b`.
81	///
82	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi32)
83	#[inline]
84	#[target_feature(enable = "avx2")]
85	#[cfg_attr(test, assert_instr(vpaddd))]
86	#[stable(feature = "simd_x86", since = "1.27.0")]
87	pub unsafe fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
88	transmute(src:simd_add(x:a.as_i32x8(), y:b.as_i32x8()))
89	}
90
91	/// Adds packed 16-bit integers in `a` and `b`.
92	///
93	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi16)
94	#[inline]
95	#[target_feature(enable = "avx2")]
96	#[cfg_attr(test, assert_instr(vpaddw))]
97	#[stable(feature = "simd_x86", since = "1.27.0")]
98	pub unsafe fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
99	transmute(src:simd_add(x:a.as_i16x16(), y:b.as_i16x16()))
100	}
101
102	/// Adds packed 8-bit integers in `a` and `b`.
103	///
104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi8)
105	#[inline]
106	#[target_feature(enable = "avx2")]
107	#[cfg_attr(test, assert_instr(vpaddb))]
108	#[stable(feature = "simd_x86", since = "1.27.0")]
109	pub unsafe fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
110	transmute(src:simd_add(x:a.as_i8x32(), y:b.as_i8x32()))
111	}
112
113	/// Adds packed 8-bit integers in `a` and `b` using saturation.
114	///
115	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi8)
116	#[inline]
117	#[target_feature(enable = "avx2")]
118	#[cfg_attr(test, assert_instr(vpaddsb))]
119	#[stable(feature = "simd_x86", since = "1.27.0")]
120	pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
121	transmute(src:simd_saturating_add(x:a.as_i8x32(), y:b.as_i8x32()))
122	}
123
124	/// Adds packed 16-bit integers in `a` and `b` using saturation.
125	///
126	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi16)
127	#[inline]
128	#[target_feature(enable = "avx2")]
129	#[cfg_attr(test, assert_instr(vpaddsw))]
130	#[stable(feature = "simd_x86", since = "1.27.0")]
131	pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
132	transmute(src:simd_saturating_add(x:a.as_i16x16(), y:b.as_i16x16()))
133	}
134
135	/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
136	///
137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu8)
138	#[inline]
139	#[target_feature(enable = "avx2")]
140	#[cfg_attr(test, assert_instr(vpaddusb))]
141	#[stable(feature = "simd_x86", since = "1.27.0")]
142	pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
143	transmute(src:simd_saturating_add(x:a.as_u8x32(), y:b.as_u8x32()))
144	}
145
146	/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
147	///
148	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu16)
149	#[inline]
150	#[target_feature(enable = "avx2")]
151	#[cfg_attr(test, assert_instr(vpaddusw))]
152	#[stable(feature = "simd_x86", since = "1.27.0")]
153	pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
154	transmute(src:simd_saturating_add(x:a.as_u16x16(), y:b.as_u16x16()))
155	}
156
157	/// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
158	/// result, shifts the result right by `n` bytes, and returns the low 16 bytes.
159	///
160	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi8)
161	#[inline]
162	#[target_feature(enable = "avx2")]
163	#[cfg_attr(test, assert_instr(vpalignr, IMM8 = `7`))]
164	#[rustc_legacy_const_generics(`2`)]
165	#[stable(feature = "simd_x86", since = "1.27.0")]
166	pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
167	static_assert_uimm_bits!(IMM8, `8`);
168	// If palignr is shifting the pair of vectors more than the size of two
169	// lanes, emit zero.
170	if IMM8 > `32` {
171	return _mm256_set1_epi8(`0`);
172	}
173	// If palignr is shifting the pair of input vectors more than one lane,
174	// but less than two lanes, convert to shifting in zeroes.
175	let (a, b) = if IMM8 > `16` {
176	(_mm256_set1_epi8(`0`), a)
177	} else {
178	(a, b)
179	};
180
181	let a = a.as_i8x32();
182	let b = b.as_i8x32();
183
184	let r: i8x32 = match IMM8 % `16` {
185	`0` => simd_shuffle!(
186	b,
187	a,
188	[
189	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`,
190	`23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
191	],
192	),
193	`1` => simd_shuffle!(
194	b,
195	a,
196	[
197	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
198	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `48`,
199	],
200	),
201	`2` => simd_shuffle!(
202	b,
203	a,
204	[
205	`2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
206	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `48`, `49`,
207	],
208	),
209	`3` => simd_shuffle!(
210	b,
211	a,
212	[
213	`3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `19`, `20`, `21`, `22`, `23`, `24`,
214	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `48`, `49`, `50`,
215	],
216	),
217	`4` => simd_shuffle!(
218	b,
219	a,
220	[
221	`4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `20`, `21`, `22`, `23`, `24`, `25`,
222	`26`, `27`, `28`, `29`, `30`, `31`, `48`, `49`, `50`, `51`,
223	],
224	),
225	`5` => simd_shuffle!(
226	b,
227	a,
228	[
229	`5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `21`, `22`, `23`, `24`, `25`, `26`,
230	`27`, `28`, `29`, `30`, `31`, `48`, `49`, `50`, `51`, `52`,
231	],
232	),
233	`6` => simd_shuffle!(
234	b,
235	a,
236	[
237	`6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `22`, `23`, `24`, `25`, `26`, `27`,
238	`28`, `29`, `30`, `31`, `48`, `49`, `50`, `51`, `52`, `53`,
239	],
240	),
241	`7` => simd_shuffle!(
242	b,
243	a,
244	[
245	`7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `23`, `24`, `25`, `26`, `27`,
246	`28`, `29`, `30`, `31`, `48`, `49`, `50`, `51`, `52`, `53`, `54`,
247	],
248	),
249	`8` => simd_shuffle!(
250	b,
251	a,
252	[
253	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `24`, `25`, `26`, `27`, `28`,
254	`29`, `30`, `31`, `48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`,
255	],
256	),
257	`9` => simd_shuffle!(
258	b,
259	a,
260	[
261	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `25`, `26`, `27`, `28`, `29`,
262	`30`, `31`, `48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`,
263	],
264	),
265	`10` => simd_shuffle!(
266	b,
267	a,
268	[
269	`10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `26`, `27`, `28`, `29`, `30`,
270	`31`, `48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`,
271	],
272	),
273	`11` => simd_shuffle!(
274	b,
275	a,
276	[
277	`11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `27`, `28`, `29`, `30`, `31`,
278	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`,
279	],
280	),
281	`12` => simd_shuffle!(
282	b,
283	a,
284	[
285	`12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `28`, `29`, `30`, `31`, `48`,
286	`49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`,
287	],
288	),
289	`13` => simd_shuffle!(
290	b,
291	a,
292	[
293	`13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `29`, `30`, `31`, `48`, `49`,
294	`50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`,
295	],
296	),
297	`14` => simd_shuffle!(
298	b,
299	a,
300	[
301	`14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `30`, `31`, `48`, `49`, `50`,
302	`51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`,
303	],
304	),
305	`15` => simd_shuffle!(
306	b,
307	a,
308	[
309	`15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `31`, `48`, `49`, `50`, `51`,
310	`52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`,
311	],
312	),
313	_ => b,
314	};
315	transmute(r)
316	}
317
318	/// Computes the bitwise AND of 256 bits (representing integer data)
319	/// in `a` and `b`.
320	///
321	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_si256)
322	#[inline]
323	#[target_feature(enable = "avx2")]
324	#[cfg_attr(test, assert_instr(vandps))]
325	#[stable(feature = "simd_x86", since = "1.27.0")]
326	pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
327	transmute(src:simd_and(x:a.as_i64x4(), y:b.as_i64x4()))
328	}
329
330	/// Computes the bitwise NOT of 256 bits (representing integer data)
331	/// in `a` and then AND with `b`.
332	///
333	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_si256)
334	#[inline]
335	#[target_feature(enable = "avx2")]
336	#[cfg_attr(test, assert_instr(vandnps))]
337	#[stable(feature = "simd_x86", since = "1.27.0")]
338	pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
339	let all_ones: __m256i = _mm256_set1_epi8(`-1`);
340	transmute(src:simd_and(
341	x:simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
342	y:b.as_i64x4(),
343	))
344	}
345
346	/// Averages packed unsigned 16-bit integers in `a` and `b`.
347	///
348	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu16)
349	#[inline]
350	#[target_feature(enable = "avx2")]
351	#[cfg_attr(test, assert_instr(vpavgw))]
352	#[stable(feature = "simd_x86", since = "1.27.0")]
353	pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
354	let a: u32x16 = simd_cast::<_, u32x16>(a.as_u16x16());
355	let b: u32x16 = simd_cast::<_, u32x16>(b.as_u16x16());
356	let r: u32x16 = simd_shr(lhs:simd_add(simd_add(a, b), u32x16::splat(`1`)), rhs:u32x16::splat(`1`));
357	transmute(src:simd_cast::<_, u16x16>(r))
358	}
359
360	/// Averages packed unsigned 8-bit integers in `a` and `b`.
361	///
362	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu8)
363	#[inline]
364	#[target_feature(enable = "avx2")]
365	#[cfg_attr(test, assert_instr(vpavgb))]
366	#[stable(feature = "simd_x86", since = "1.27.0")]
367	pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
368	let a: u16x32 = simd_cast::<_, u16x32>(a.as_u8x32());
369	let b: u16x32 = simd_cast::<_, u16x32>(b.as_u8x32());
370	let r: u16x32 = simd_shr(lhs:simd_add(simd_add(a, b), u16x32::splat(`1`)), rhs:u16x32::splat(`1`));
371	transmute(src:simd_cast::<_, u8x32>(r))
372	}
373
374	/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM4`.
375	///
376	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi32)
377	#[inline]
378	#[target_feature(enable = "avx2")]
379	#[cfg_attr(test, assert_instr(vblendps, IMM4 = `9`))]
380	#[rustc_legacy_const_generics(`2`)]
381	#[stable(feature = "simd_x86", since = "1.27.0")]
382	pub unsafe fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
383	static_assert_uimm_bits!(IMM4, `4`);
384	let a: i32x4 = a.as_i32x4();
385	let b: i32x4 = b.as_i32x4();
386	let r: i32x4 = simd_shuffle!(
387	a,
388	b,
389	[
390	[`0`, `4`, `0`, `4`][IMM4 as usize & `0b11`],
391	[`1`, `1`, `5`, `5`][IMM4 as usize & `0b11`],
392	[`2`, `6`, `2`, `6`][(IMM4 as usize >> `2`) & `0b11`],
393	[`3`, `3`, `7`, `7`][(IMM4 as usize >> `2`) & `0b11`],
394	],
395	);
396	transmute(src:r)
397	}
398
399	/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM8`.
400	///
401	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi32)
402	#[inline]
403	#[target_feature(enable = "avx2")]
404	#[cfg_attr(test, assert_instr(vblendps, IMM8 = `9`))]
405	#[rustc_legacy_const_generics(`2`)]
406	#[stable(feature = "simd_x86", since = "1.27.0")]
407	pub unsafe fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
408	static_assert_uimm_bits!(IMM8, `8`);
409	let a: i32x8 = a.as_i32x8();
410	let b: i32x8 = b.as_i32x8();
411	let r: i32x8 = simd_shuffle!(
412	a,
413	b,
414	[
415	[`0`, `8`, `0`, `8`][IMM8 as usize & `0b11`],
416	[`1`, `1`, `9`, `9`][IMM8 as usize & `0b11`],
417	[`2`, `10`, `2`, `10`][(IMM8 as usize >> `2`) & `0b11`],
418	[`3`, `3`, `11`, `11`][(IMM8 as usize >> `2`) & `0b11`],
419	[`4`, `12`, `4`, `12`][(IMM8 as usize >> `4`) & `0b11`],
420	[`5`, `5`, `13`, `13`][(IMM8 as usize >> `4`) & `0b11`],
421	[`6`, `14`, `6`, `14`][(IMM8 as usize >> `6`) & `0b11`],
422	[`7`, `7`, `15`, `15`][(IMM8 as usize >> `6`) & `0b11`],
423	],
424	);
425	transmute(src:r)
426	}
427
428	/// Blends packed 16-bit integers from `a` and `b` using control mask `IMM8`.
429	///
430	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi16)
431	#[inline]
432	#[target_feature(enable = "avx2")]
433	#[cfg_attr(test, assert_instr(vpblendw, IMM8 = `9`))]
434	#[rustc_legacy_const_generics(`2`)]
435	#[stable(feature = "simd_x86", since = "1.27.0")]
436	pub unsafe fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
437	static_assert_uimm_bits!(IMM8, `8`);
438	let a = a.as_i16x16();
439	let b = b.as_i16x16();
440
441	let r: i16x16 = simd_shuffle!(
442	a,
443	b,
444	[
445	[`0`, `16`, `0`, `16`][IMM8 as usize & `0b11`],
446	[`1`, `1`, `17`, `17`][IMM8 as usize & `0b11`],
447	[`2`, `18`, `2`, `18`][(IMM8 as usize >> `2`) & `0b11`],
448	[`3`, `3`, `19`, `19`][(IMM8 as usize >> `2`) & `0b11`],
449	[`4`, `20`, `4`, `20`][(IMM8 as usize >> `4`) & `0b11`],
450	[`5`, `5`, `21`, `21`][(IMM8 as usize >> `4`) & `0b11`],
451	[`6`, `22`, `6`, `22`][(IMM8 as usize >> `6`) & `0b11`],
452	[`7`, `7`, `23`, `23`][(IMM8 as usize >> `6`) & `0b11`],
453	[`8`, `24`, `8`, `24`][IMM8 as usize & `0b11`],
454	[`9`, `9`, `25`, `25`][IMM8 as usize & `0b11`],
455	[`10`, `26`, `10`, `26`][(IMM8 as usize >> `2`) & `0b11`],
456	[`11`, `11`, `27`, `27`][(IMM8 as usize >> `2`) & `0b11`],
457	[`12`, `28`, `12`, `28`][(IMM8 as usize >> `4`) & `0b11`],
458	[`13`, `13`, `29`, `29`][(IMM8 as usize >> `4`) & `0b11`],
459	[`14`, `30`, `14`, `30`][(IMM8 as usize >> `6`) & `0b11`],
460	[`15`, `15`, `31`, `31`][(IMM8 as usize >> `6`) & `0b11`],
461	],
462	);
463	transmute(r)
464	}
465
466	/// Blends packed 8-bit integers from `a` and `b` using `mask`.
467	///
468	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_epi8)
469	#[inline]
470	#[target_feature(enable = "avx2")]
471	#[cfg_attr(test, assert_instr(vpblendvb))]
472	#[stable(feature = "simd_x86", since = "1.27.0")]
473	pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
474	let mask: i8x32 = simd_lt(x:mask.as_i8x32(), y:i8x32::splat(`0`));
475	transmute(src:simd_select(mask, if_true:b.as_i8x32(), if_false:a.as_i8x32()))
476	}
477
478	/// Broadcasts the low packed 8-bit integer from `a` to all elements of
479	/// the 128-bit returned value.
480	///
481	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastb_epi8)
482	#[inline]
483	#[target_feature(enable = "avx2")]
484	#[cfg_attr(test, assert_instr(vpbroadcastb))]
485	#[stable(feature = "simd_x86", since = "1.27.0")]
486	pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
487	let zero: __m128i = _mm_setzero_si128();
488	let ret: i8x16 = simd_shuffle!(a.as_i8x16(), zero.as_i8x16(), [`0_u32`; `16`]);
489	transmute::<i8x16, _>(src:ret)
490	}
491
492	/// Broadcasts the low packed 8-bit integer from `a` to all elements of
493	/// the 256-bit returned value.
494	///
495	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastb_epi8)
496	#[inline]
497	#[target_feature(enable = "avx2")]
498	#[cfg_attr(test, assert_instr(vpbroadcastb))]
499	#[stable(feature = "simd_x86", since = "1.27.0")]
500	pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
501	let zero: __m128i = _mm_setzero_si128();
502	let ret: i8x32 = simd_shuffle!(a.as_i8x16(), zero.as_i8x16(), [`0_u32`; `32`]);
503	transmute::<i8x32, _>(src:ret)
504	}
505
506	// N.B., `simd_shuffle4` with integer data types for `a` and `b` is
507	// often compiled to `vbroadcastss`.
508	/// Broadcasts the low packed 32-bit integer from `a` to all elements of
509	/// the 128-bit returned value.
510	///
511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastd_epi32)
512	#[inline]
513	#[target_feature(enable = "avx2")]
514	#[cfg_attr(test, assert_instr(vbroadcastss))]
515	#[stable(feature = "simd_x86", since = "1.27.0")]
516	pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
517	let zero: __m128i = _mm_setzero_si128();
518	let ret: i32x4 = simd_shuffle!(a.as_i32x4(), zero.as_i32x4(), [`0_u32`; `4`]);
519	transmute::<i32x4, _>(src:ret)
520	}
521
522	// N.B., `simd_shuffle4`` with integer data types for `a` and `b` is
523	// often compiled to `vbroadcastss`.
524	/// Broadcasts the low packed 32-bit integer from `a` to all elements of
525	/// the 256-bit returned value.
526	///
527	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastd_epi32)
528	#[inline]
529	#[target_feature(enable = "avx2")]
530	#[cfg_attr(test, assert_instr(vbroadcastss))]
531	#[stable(feature = "simd_x86", since = "1.27.0")]
532	pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
533	let zero: __m128i = _mm_setzero_si128();
534	let ret: i32x8 = simd_shuffle!(a.as_i32x4(), zero.as_i32x4(), [`0_u32`; `8`]);
535	transmute::<i32x8, _>(src:ret)
536	}
537
538	/// Broadcasts the low packed 64-bit integer from `a` to all elements of
539	/// the 128-bit returned value.
540	///
541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastq_epi64)
542	#[inline]
543	#[target_feature(enable = "avx2")]
544	// Emits `vmovddup` instead of `vpbroadcastq`
545	// See https://github.com/rust-lang/stdarch/issues/791
546	#[cfg_attr(test, assert_instr(vmovddup))]
547	#[stable(feature = "simd_x86", since = "1.27.0")]
548	pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
549	let ret: i64x2 = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [`0_u32`; `2`]);
550	transmute::<i64x2, _>(src:ret)
551	}
552
553	/// Broadcasts the low packed 64-bit integer from `a` to all elements of
554	/// the 256-bit returned value.
555	///
556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastq_epi64)
557	#[inline]
558	#[target_feature(enable = "avx2")]
559	#[cfg_attr(test, assert_instr(vbroadcastsd))]
560	#[stable(feature = "simd_x86", since = "1.27.0")]
561	pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
562	let ret: i64x4 = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [`0_u32`; `4`]);
563	transmute::<i64x4, _>(src:ret)
564	}
565
566	/// Broadcasts the low double-precision (64-bit) floating-point element
567	/// from `a` to all elements of the 128-bit returned value.
568	///
569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastsd_pd)
570	#[inline]
571	#[target_feature(enable = "avx2")]
572	#[cfg_attr(test, assert_instr(vmovddup))]
573	#[stable(feature = "simd_x86", since = "1.27.0")]
574	pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
575	simd_shuffle!(a, _mm_setzero_pd(), [`0_u32`; `2`])
576	}
577
578	/// Broadcasts the low double-precision (64-bit) floating-point element
579	/// from `a` to all elements of the 256-bit returned value.
580	///
581	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsd_pd)
582	#[inline]
583	#[target_feature(enable = "avx2")]
584	#[cfg_attr(test, assert_instr(vbroadcastsd))]
585	#[stable(feature = "simd_x86", since = "1.27.0")]
586	pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
587	simd_shuffle!(a, _mm_setzero_pd(), [`0_u32`; `4`])
588	}
589
590	// N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or
591	// `vbroadcastf128`.
592	/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
593	/// the 256-bit returned value.
594	///
595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsi128_si256)
596	#[inline]
597	#[target_feature(enable = "avx2")]
598	#[stable(feature = "simd_x86", since = "1.27.0")]
599	pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
600	let zero: __m128i = _mm_setzero_si128();
601	let ret: i64x4 = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [`0`, `1`, `0`, `1`]);
602	transmute::<i64x4, _>(src:ret)
603	}
604
605	/// Broadcasts the low single-precision (32-bit) floating-point element
606	/// from `a` to all elements of the 128-bit returned value.
607	///
608	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastss_ps)
609	#[inline]
610	#[target_feature(enable = "avx2")]
611	#[cfg_attr(test, assert_instr(vbroadcastss))]
612	#[stable(feature = "simd_x86", since = "1.27.0")]
613	pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 {
614	simd_shuffle!(a, _mm_setzero_ps(), [`0_u32`; `4`])
615	}
616
617	/// Broadcasts the low single-precision (32-bit) floating-point element
618	/// from `a` to all elements of the 256-bit returned value.
619	///
620	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastss_ps)
621	#[inline]
622	#[target_feature(enable = "avx2")]
623	#[cfg_attr(test, assert_instr(vbroadcastss))]
624	#[stable(feature = "simd_x86", since = "1.27.0")]
625	pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
626	simd_shuffle!(a, _mm_setzero_ps(), [`0_u32`; `8`])
627	}
628
629	/// Broadcasts the low packed 16-bit integer from a to all elements of
630	/// the 128-bit returned value
631	///
632	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastw_epi16)
633	#[inline]
634	#[target_feature(enable = "avx2")]
635	#[cfg_attr(test, assert_instr(vpbroadcastw))]
636	#[stable(feature = "simd_x86", since = "1.27.0")]
637	pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
638	let zero: __m128i = _mm_setzero_si128();
639	let ret: i16x8 = simd_shuffle!(a.as_i16x8(), zero.as_i16x8(), [`0_u32`; `8`]);
640	transmute::<i16x8, _>(src:ret)
641	}
642
643	/// Broadcasts the low packed 16-bit integer from a to all elements of
644	/// the 256-bit returned value
645	///
646	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastw_epi16)
647	#[inline]
648	#[target_feature(enable = "avx2")]
649	#[cfg_attr(test, assert_instr(vpbroadcastw))]
650	#[stable(feature = "simd_x86", since = "1.27.0")]
651	pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
652	let zero: __m128i = _mm_setzero_si128();
653	let ret: i16x16 = simd_shuffle!(a.as_i16x8(), zero.as_i16x8(), [`0_u32`; `16`]);
654	transmute::<i16x16, _>(src:ret)
655	}
656
657	/// Compares packed 64-bit integers in `a` and `b` for equality.
658	///
659	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64)
660	#[inline]
661	#[target_feature(enable = "avx2")]
662	#[cfg_attr(test, assert_instr(vpcmpeqq))]
663	#[stable(feature = "simd_x86", since = "1.27.0")]
664	pub unsafe fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
665	transmute::<i64x4, _>(src:simd_eq(x:a.as_i64x4(), y:b.as_i64x4()))
666	}
667
668	/// Compares packed 32-bit integers in `a` and `b` for equality.
669	///
670	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32)
671	#[inline]
672	#[target_feature(enable = "avx2")]
673	#[cfg_attr(test, assert_instr(vpcmpeqd))]
674	#[stable(feature = "simd_x86", since = "1.27.0")]
675	pub unsafe fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
676	transmute::<i32x8, _>(src:simd_eq(x:a.as_i32x8(), y:b.as_i32x8()))
677	}
678
679	/// Compares packed 16-bit integers in `a` and `b` for equality.
680	///
681	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16)
682	#[inline]
683	#[target_feature(enable = "avx2")]
684	#[cfg_attr(test, assert_instr(vpcmpeqw))]
685	#[stable(feature = "simd_x86", since = "1.27.0")]
686	pub unsafe fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
687	transmute::<i16x16, _>(src:simd_eq(x:a.as_i16x16(), y:b.as_i16x16()))
688	}
689
690	/// Compares packed 8-bit integers in `a` and `b` for equality.
691	///
692	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8)
693	#[inline]
694	#[target_feature(enable = "avx2")]
695	#[cfg_attr(test, assert_instr(vpcmpeqb))]
696	#[stable(feature = "simd_x86", since = "1.27.0")]
697	pub unsafe fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
698	transmute::<i8x32, _>(src:simd_eq(x:a.as_i8x32(), y:b.as_i8x32()))
699	}
700
701	/// Compares packed 64-bit integers in `a` and `b` for greater-than.
702	///
703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64)
704	#[inline]
705	#[target_feature(enable = "avx2")]
706	#[cfg_attr(test, assert_instr(vpcmpgtq))]
707	#[stable(feature = "simd_x86", since = "1.27.0")]
708	pub unsafe fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
709	transmute::<i64x4, _>(src:simd_gt(x:a.as_i64x4(), y:b.as_i64x4()))
710	}
711
712	/// Compares packed 32-bit integers in `a` and `b` for greater-than.
713	///
714	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32)
715	#[inline]
716	#[target_feature(enable = "avx2")]
717	#[cfg_attr(test, assert_instr(vpcmpgtd))]
718	#[stable(feature = "simd_x86", since = "1.27.0")]
719	pub unsafe fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
720	transmute::<i32x8, _>(src:simd_gt(x:a.as_i32x8(), y:b.as_i32x8()))
721	}
722
723	/// Compares packed 16-bit integers in `a` and `b` for greater-than.
724	///
725	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16)
726	#[inline]
727	#[target_feature(enable = "avx2")]
728	#[cfg_attr(test, assert_instr(vpcmpgtw))]
729	#[stable(feature = "simd_x86", since = "1.27.0")]
730	pub unsafe fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
731	transmute::<i16x16, _>(src:simd_gt(x:a.as_i16x16(), y:b.as_i16x16()))
732	}
733
734	/// Compares packed 8-bit integers in `a` and `b` for greater-than.
735	///
736	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8)
737	#[inline]
738	#[target_feature(enable = "avx2")]
739	#[cfg_attr(test, assert_instr(vpcmpgtb))]
740	#[stable(feature = "simd_x86", since = "1.27.0")]
741	pub unsafe fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
742	transmute::<i8x32, _>(src:simd_gt(x:a.as_i8x32(), y:b.as_i8x32()))
743	}
744
745	/// Sign-extend 16-bit integers to 32-bit integers.
746	///
747	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi32)
748	#[inline]
749	#[target_feature(enable = "avx2")]
750	#[cfg_attr(test, assert_instr(vpmovsxwd))]
751	#[stable(feature = "simd_x86", since = "1.27.0")]
752	pub unsafe fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
753	transmute::<i32x8, _>(src:simd_cast(a.as_i16x8()))
754	}
755
756	/// Sign-extend 16-bit integers to 64-bit integers.
757	///
758	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi64)
759	#[inline]
760	#[target_feature(enable = "avx2")]
761	#[cfg_attr(test, assert_instr(vpmovsxwq))]
762	#[stable(feature = "simd_x86", since = "1.27.0")]
763	pub unsafe fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
764	let a: i16x8 = a.as_i16x8();
765	let v64: i16x4 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]);
766	transmute::<i64x4, _>(src:simd_cast(v64))
767	}
768
769	/// Sign-extend 32-bit integers to 64-bit integers.
770	///
771	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi64)
772	#[inline]
773	#[target_feature(enable = "avx2")]
774	#[cfg_attr(test, assert_instr(vpmovsxdq))]
775	#[stable(feature = "simd_x86", since = "1.27.0")]
776	pub unsafe fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
777	transmute::<i64x4, _>(src:simd_cast(a.as_i32x4()))
778	}
779
780	/// Sign-extend 8-bit integers to 16-bit integers.
781	///
782	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi16)
783	#[inline]
784	#[target_feature(enable = "avx2")]
785	#[cfg_attr(test, assert_instr(vpmovsxbw))]
786	#[stable(feature = "simd_x86", since = "1.27.0")]
787	pub unsafe fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
788	transmute::<i16x16, _>(src:simd_cast(a.as_i8x16()))
789	}
790
791	/// Sign-extend 8-bit integers to 32-bit integers.
792	///
793	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi32)
794	#[inline]
795	#[target_feature(enable = "avx2")]
796	#[cfg_attr(test, assert_instr(vpmovsxbd))]
797	#[stable(feature = "simd_x86", since = "1.27.0")]
798	pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
799	let a: i8x16 = a.as_i8x16();
800	let v64: i8x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
801	transmute::<i32x8, _>(src:simd_cast(v64))
802	}
803
804	/// Sign-extend 8-bit integers to 64-bit integers.
805	///
806	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi64)
807	#[inline]
808	#[target_feature(enable = "avx2")]
809	#[cfg_attr(test, assert_instr(vpmovsxbq))]
810	#[stable(feature = "simd_x86", since = "1.27.0")]
811	pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
812	let a: i8x16 = a.as_i8x16();
813	let v32: i8x4 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]);
814	transmute::<i64x4, _>(src:simd_cast(v32))
815	}
816
817	/// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit
818	/// integers, and stores the results in `dst`.
819	///
820	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi32)
821	#[inline]
822	#[target_feature(enable = "avx2")]
823	#[cfg_attr(test, assert_instr(vpmovzxwd))]
824	#[stable(feature = "simd_x86", since = "1.27.0")]
825	pub unsafe fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
826	transmute::<i32x8, _>(src:simd_cast(a.as_u16x8()))
827	}
828
829	/// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit
830	/// integers. The upper four elements of `a` are unused.
831	///
832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi64)
833	#[inline]
834	#[target_feature(enable = "avx2")]
835	#[cfg_attr(test, assert_instr(vpmovzxwq))]
836	#[stable(feature = "simd_x86", since = "1.27.0")]
837	pub unsafe fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
838	let a: u16x8 = a.as_u16x8();
839	let v64: u16x4 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]);
840	transmute::<i64x4, _>(src:simd_cast(v64))
841	}
842
843	/// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers.
844	///
845	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_epi64)
846	#[inline]
847	#[target_feature(enable = "avx2")]
848	#[cfg_attr(test, assert_instr(vpmovzxdq))]
849	#[stable(feature = "simd_x86", since = "1.27.0")]
850	pub unsafe fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
851	transmute::<i64x4, _>(src:simd_cast(a.as_u32x4()))
852	}
853
854	/// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers.
855	///
856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi16)
857	#[inline]
858	#[target_feature(enable = "avx2")]
859	#[cfg_attr(test, assert_instr(vpmovzxbw))]
860	#[stable(feature = "simd_x86", since = "1.27.0")]
861	pub unsafe fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
862	transmute::<i16x16, _>(src:simd_cast(a.as_u8x16()))
863	}
864
865	/// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit
866	/// integers. The upper eight elements of `a` are unused.
867	///
868	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi32)
869	#[inline]
870	#[target_feature(enable = "avx2")]
871	#[cfg_attr(test, assert_instr(vpmovzxbd))]
872	#[stable(feature = "simd_x86", since = "1.27.0")]
873	pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
874	let a: u8x16 = a.as_u8x16();
875	let v64: u8x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
876	transmute::<i32x8, _>(src:simd_cast(v64))
877	}
878
879	/// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit
880	/// integers. The upper twelve elements of `a` are unused.
881	///
882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi64)
883	#[inline]
884	#[target_feature(enable = "avx2")]
885	#[cfg_attr(test, assert_instr(vpmovzxbq))]
886	#[stable(feature = "simd_x86", since = "1.27.0")]
887	pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
888	let a: u8x16 = a.as_u8x16();
889	let v32: u8x4 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]);
890	transmute::<i64x4, _>(src:simd_cast(v32))
891	}
892
893	/// Extracts 128 bits (of integer data) from `a` selected with `IMM1`.
894	///
895	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti128_si256)
896	#[inline]
897	#[target_feature(enable = "avx2")]
898	#[cfg_attr(
899	all(test, not(target_os = "windows")),
900	assert_instr(vextractf128, IMM1 = `1`)
901	)]
902	#[rustc_legacy_const_generics(`1`)]
903	#[stable(feature = "simd_x86", since = "1.27.0")]
904	pub unsafe fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
905	static_assert_uimm_bits!(IMM1, `1`);
906	let a: i64x4 = a.as_i64x4();
907	let b: i64x4 = _mm256_undefined_si256().as_i64x4();
908	let dst: i64x2 = simd_shuffle!(a, b, [[`0`, `1`], [`2`, `3`]][IMM1 as usize]);
909	transmute(src:dst)
910	}
911
912	/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`.
913	///
914	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi16)
915	#[inline]
916	#[target_feature(enable = "avx2")]
917	#[cfg_attr(test, assert_instr(vphaddw))]
918	#[stable(feature = "simd_x86", since = "1.27.0")]
919	pub unsafe fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
920	transmute(src:phaddw(a:a.as_i16x16(), b:b.as_i16x16()))
921	}
922
923	/// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`.
924	///
925	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi32)
926	#[inline]
927	#[target_feature(enable = "avx2")]
928	#[cfg_attr(test, assert_instr(vphaddd))]
929	#[stable(feature = "simd_x86", since = "1.27.0")]
930	pub unsafe fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
931	transmute(src:phaddd(a:a.as_i32x8(), b:b.as_i32x8()))
932	}
933
934	/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`
935	/// using saturation.
936	///
937	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadds_epi16)
938	#[inline]
939	#[target_feature(enable = "avx2")]
940	#[cfg_attr(test, assert_instr(vphaddsw))]
941	#[stable(feature = "simd_x86", since = "1.27.0")]
942	pub unsafe fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
943	transmute(src:phaddsw(a:a.as_i16x16(), b:b.as_i16x16()))
944	}
945
946	/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`.
947	///
948	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi16)
949	#[inline]
950	#[target_feature(enable = "avx2")]
951	#[cfg_attr(test, assert_instr(vphsubw))]
952	#[stable(feature = "simd_x86", since = "1.27.0")]
953	pub unsafe fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
954	transmute(src:phsubw(a:a.as_i16x16(), b:b.as_i16x16()))
955	}
956
957	/// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`.
958	///
959	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi32)
960	#[inline]
961	#[target_feature(enable = "avx2")]
962	#[cfg_attr(test, assert_instr(vphsubd))]
963	#[stable(feature = "simd_x86", since = "1.27.0")]
964	pub unsafe fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
965	transmute(src:phsubd(a:a.as_i32x8(), b:b.as_i32x8()))
966	}
967
968	/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
969	/// using saturation.
970	///
971	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsubs_epi16)
972	#[inline]
973	#[target_feature(enable = "avx2")]
974	#[cfg_attr(test, assert_instr(vphsubsw))]
975	#[stable(feature = "simd_x86", since = "1.27.0")]
976	pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
977	transmute(src:phsubsw(a:a.as_i16x16(), b:b.as_i16x16()))
978	}
979
980	/// Returns values from `slice` at offsets determined by `offsets scale`,*
981	/// where
982	/// `scale` should be 1, 2, 4 or 8.
983	///
984	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi32)
985	#[inline]
986	#[target_feature(enable = "avx2")]
987	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
988	#[rustc_legacy_const_generics(`2`)]
989	#[stable(feature = "simd_x86", since = "1.27.0")]
990	pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
991	slice: *const i32,
992	offsets: __m128i,
993	) -> __m128i {
994	static_assert_imm8_scale!(SCALE);
995	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
996	let neg_one: i32x4 = _mm_set1_epi32(`-1`).as_i32x4();
997	let offsets: i32x4 = offsets.as_i32x4();
998	let slice: const i8 = slice as const i8;
999	let r: i32x4 = pgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1000	transmute(src:r)
1001	}
1002
1003	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1004	/// where
1005	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1006	/// that position instead.
1007	///
1008	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi32)
1009	#[inline]
1010	#[target_feature(enable = "avx2")]
1011	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
1012	#[rustc_legacy_const_generics(`4`)]
1013	#[stable(feature = "simd_x86", since = "1.27.0")]
1014	pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1015	src: __m128i,
1016	slice: *const i32,
1017	offsets: __m128i,
1018	mask: __m128i,
1019	) -> __m128i {
1020	static_assert_imm8_scale!(SCALE);
1021	let src: i32x4 = src.as_i32x4();
1022	let mask: i32x4 = mask.as_i32x4();
1023	let offsets: i32x4 = offsets.as_i32x4();
1024	let slice: const i8 = slice as const i8;
1025	let r: i32x4 = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1026	transmute(src:r)
1027	}
1028
1029	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1030	/// where
1031	/// `scale` should be 1, 2, 4 or 8.
1032	///
1033	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi32)
1034	#[inline]
1035	#[target_feature(enable = "avx2")]
1036	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
1037	#[rustc_legacy_const_generics(`2`)]
1038	#[stable(feature = "simd_x86", since = "1.27.0")]
1039	pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1040	slice: *const i32,
1041	offsets: __m256i,
1042	) -> __m256i {
1043	static_assert_imm8_scale!(SCALE);
1044	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
1045	let neg_one: i32x8 = _mm256_set1_epi32(`-1`).as_i32x8();
1046	let offsets: i32x8 = offsets.as_i32x8();
1047	let slice: const i8 = slice as const i8;
1048	let r: i32x8 = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1049	transmute(src:r)
1050	}
1051
1052	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1053	/// where
1054	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1055	/// that position instead.
1056	///
1057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi32)
1058	#[inline]
1059	#[target_feature(enable = "avx2")]
1060	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
1061	#[rustc_legacy_const_generics(`4`)]
1062	#[stable(feature = "simd_x86", since = "1.27.0")]
1063	pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1064	src: __m256i,
1065	slice: *const i32,
1066	offsets: __m256i,
1067	mask: __m256i,
1068	) -> __m256i {
1069	static_assert_imm8_scale!(SCALE);
1070	let src: i32x8 = src.as_i32x8();
1071	let mask: i32x8 = mask.as_i32x8();
1072	let offsets: i32x8 = offsets.as_i32x8();
1073	let slice: const i8 = slice as const i8;
1074	let r: i32x8 = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1075	transmute(src:r)
1076	}
1077
1078	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1079	/// where
1080	/// `scale` should be 1, 2, 4 or 8.
1081	///
1082	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_ps)
1083	#[inline]
1084	#[target_feature(enable = "avx2")]
1085	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
1086	#[rustc_legacy_const_generics(`2`)]
1087	#[stable(feature = "simd_x86", since = "1.27.0")]
1088	pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1089	static_assert_imm8_scale!(SCALE);
1090	let zero: __m128 = _mm_setzero_ps();
1091	let neg_one: __m128 = _mm_set1_ps(`-1.0`);
1092	let offsets: i32x4 = offsets.as_i32x4();
1093	let slice: const i8 = slice as const i8;
1094	pgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1095	}
1096
1097	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1098	/// where
1099	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1100	/// that position instead.
1101	///
1102	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_ps)
1103	#[inline]
1104	#[target_feature(enable = "avx2")]
1105	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
1106	#[rustc_legacy_const_generics(`4`)]
1107	#[stable(feature = "simd_x86", since = "1.27.0")]
1108	pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1109	src: __m128,
1110	slice: *const f32,
1111	offsets: __m128i,
1112	mask: __m128,
1113	) -> __m128 {
1114	static_assert_imm8_scale!(SCALE);
1115	let offsets: i32x4 = offsets.as_i32x4();
1116	let slice: const i8 = slice as const i8;
1117	pgatherdps(src, slice, offsets, mask, SCALE as i8)
1118	}
1119
1120	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1121	/// where
1122	/// `scale` should be 1, 2, 4 or 8.
1123	///
1124	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_ps)
1125	#[inline]
1126	#[target_feature(enable = "avx2")]
1127	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
1128	#[rustc_legacy_const_generics(`2`)]
1129	#[stable(feature = "simd_x86", since = "1.27.0")]
1130	pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1131	static_assert_imm8_scale!(SCALE);
1132	let zero: __m256 = _mm256_setzero_ps();
1133	let neg_one: __m256 = _mm256_set1_ps(`-1.0`);
1134	let offsets: i32x8 = offsets.as_i32x8();
1135	let slice: const i8 = slice as const i8;
1136	vpgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1137	}
1138
1139	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1140	/// where
1141	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1142	/// that position instead.
1143	///
1144	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_ps)
1145	#[inline]
1146	#[target_feature(enable = "avx2")]
1147	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
1148	#[rustc_legacy_const_generics(`4`)]
1149	#[stable(feature = "simd_x86", since = "1.27.0")]
1150	pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1151	src: __m256,
1152	slice: *const f32,
1153	offsets: __m256i,
1154	mask: __m256,
1155	) -> __m256 {
1156	static_assert_imm8_scale!(SCALE);
1157	let offsets: i32x8 = offsets.as_i32x8();
1158	let slice: const i8 = slice as const i8;
1159	vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1160	}
1161
1162	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1163	/// where
1164	/// `scale` should be 1, 2, 4 or 8.
1165	///
1166	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi64)
1167	#[inline]
1168	#[target_feature(enable = "avx2")]
1169	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
1170	#[rustc_legacy_const_generics(`2`)]
1171	#[stable(feature = "simd_x86", since = "1.27.0")]
1172	pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1173	slice: *const i64,
1174	offsets: __m128i,
1175	) -> __m128i {
1176	static_assert_imm8_scale!(SCALE);
1177	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1178	let neg_one: i64x2 = _mm_set1_epi64x(`-1`).as_i64x2();
1179	let offsets: i32x4 = offsets.as_i32x4();
1180	let slice: const i8 = slice as const i8;
1181	let r: i64x2 = pgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1182	transmute(src:r)
1183	}
1184
1185	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1186	/// where
1187	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1188	/// that position instead.
1189	///
1190	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi64)
1191	#[inline]
1192	#[target_feature(enable = "avx2")]
1193	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
1194	#[rustc_legacy_const_generics(`4`)]
1195	#[stable(feature = "simd_x86", since = "1.27.0")]
1196	pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1197	src: __m128i,
1198	slice: *const i64,
1199	offsets: __m128i,
1200	mask: __m128i,
1201	) -> __m128i {
1202	static_assert_imm8_scale!(SCALE);
1203	let src: i64x2 = src.as_i64x2();
1204	let mask: i64x2 = mask.as_i64x2();
1205	let offsets: i32x4 = offsets.as_i32x4();
1206	let slice: const i8 = slice as const i8;
1207	let r: i64x2 = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1208	transmute(src:r)
1209	}
1210
1211	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1212	/// where
1213	/// `scale` should be 1, 2, 4 or 8.
1214	///
1215	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi64)
1216	#[inline]
1217	#[target_feature(enable = "avx2")]
1218	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
1219	#[rustc_legacy_const_generics(`2`)]
1220	#[stable(feature = "simd_x86", since = "1.27.0")]
1221	pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1222	slice: *const i64,
1223	offsets: __m128i,
1224	) -> __m256i {
1225	static_assert_imm8_scale!(SCALE);
1226	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1227	let neg_one: i64x4 = _mm256_set1_epi64x(`-1`).as_i64x4();
1228	let offsets: i32x4 = offsets.as_i32x4();
1229	let slice: const i8 = slice as const i8;
1230	let r: i64x4 = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1231	transmute(src:r)
1232	}
1233
1234	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1235	/// where
1236	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1237	/// that position instead.
1238	///
1239	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi64)
1240	#[inline]
1241	#[target_feature(enable = "avx2")]
1242	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
1243	#[rustc_legacy_const_generics(`4`)]
1244	#[stable(feature = "simd_x86", since = "1.27.0")]
1245	pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1246	src: __m256i,
1247	slice: *const i64,
1248	offsets: __m128i,
1249	mask: __m256i,
1250	) -> __m256i {
1251	static_assert_imm8_scale!(SCALE);
1252	let src: i64x4 = src.as_i64x4();
1253	let mask: i64x4 = mask.as_i64x4();
1254	let offsets: i32x4 = offsets.as_i32x4();
1255	let slice: const i8 = slice as const i8;
1256	let r: i64x4 = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1257	transmute(src:r)
1258	}
1259
1260	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1261	/// where
1262	/// `scale` should be 1, 2, 4 or 8.
1263	///
1264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_pd)
1265	#[inline]
1266	#[target_feature(enable = "avx2")]
1267	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
1268	#[rustc_legacy_const_generics(`2`)]
1269	#[stable(feature = "simd_x86", since = "1.27.0")]
1270	pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1271	static_assert_imm8_scale!(SCALE);
1272	let zero: __m128d = _mm_setzero_pd();
1273	let neg_one: __m128d = _mm_set1_pd(`-1.0`);
1274	let offsets: i32x4 = offsets.as_i32x4();
1275	let slice: const i8 = slice as const i8;
1276	pgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1277	}
1278
1279	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1280	/// where
1281	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1282	/// that position instead.
1283	///
1284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_pd)
1285	#[inline]
1286	#[target_feature(enable = "avx2")]
1287	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
1288	#[rustc_legacy_const_generics(`4`)]
1289	#[stable(feature = "simd_x86", since = "1.27.0")]
1290	pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1291	src: __m128d,
1292	slice: *const f64,
1293	offsets: __m128i,
1294	mask: __m128d,
1295	) -> __m128d {
1296	static_assert_imm8_scale!(SCALE);
1297	let offsets: i32x4 = offsets.as_i32x4();
1298	let slice: const i8 = slice as const i8;
1299	pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1300	}
1301
1302	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1303	/// where
1304	/// `scale` should be 1, 2, 4 or 8.
1305	///
1306	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_pd)
1307	#[inline]
1308	#[target_feature(enable = "avx2")]
1309	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
1310	#[rustc_legacy_const_generics(`2`)]
1311	#[stable(feature = "simd_x86", since = "1.27.0")]
1312	pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1313	slice: *const f64,
1314	offsets: __m128i,
1315	) -> __m256d {
1316	static_assert_imm8_scale!(SCALE);
1317	let zero: __m256d = _mm256_setzero_pd();
1318	let neg_one: __m256d = _mm256_set1_pd(`-1.0`);
1319	let offsets: i32x4 = offsets.as_i32x4();
1320	let slice: const i8 = slice as const i8;
1321	vpgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1322	}
1323
1324	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1325	/// where
1326	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1327	/// that position instead.
1328	///
1329	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_pd)
1330	#[inline]
1331	#[target_feature(enable = "avx2")]
1332	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
1333	#[rustc_legacy_const_generics(`4`)]
1334	#[stable(feature = "simd_x86", since = "1.27.0")]
1335	pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1336	src: __m256d,
1337	slice: *const f64,
1338	offsets: __m128i,
1339	mask: __m256d,
1340	) -> __m256d {
1341	static_assert_imm8_scale!(SCALE);
1342	let offsets: i32x4 = offsets.as_i32x4();
1343	let slice: const i8 = slice as const i8;
1344	vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1345	}
1346
1347	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1348	/// where
1349	/// `scale` should be 1, 2, 4 or 8.
1350	///
1351	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi32)
1352	#[inline]
1353	#[target_feature(enable = "avx2")]
1354	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
1355	#[rustc_legacy_const_generics(`2`)]
1356	#[stable(feature = "simd_x86", since = "1.27.0")]
1357	pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1358	slice: *const i32,
1359	offsets: __m128i,
1360	) -> __m128i {
1361	static_assert_imm8_scale!(SCALE);
1362	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1363	let neg_one: i32x4 = _mm_set1_epi64x(`-1`).as_i32x4();
1364	let offsets: i64x2 = offsets.as_i64x2();
1365	let slice: const i8 = slice as const i8;
1366	let r: i32x4 = pgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1367	transmute(src:r)
1368	}
1369
1370	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1371	/// where
1372	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1373	/// that position instead.
1374	///
1375	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi32)
1376	#[inline]
1377	#[target_feature(enable = "avx2")]
1378	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
1379	#[rustc_legacy_const_generics(`4`)]
1380	#[stable(feature = "simd_x86", since = "1.27.0")]
1381	pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1382	src: __m128i,
1383	slice: *const i32,
1384	offsets: __m128i,
1385	mask: __m128i,
1386	) -> __m128i {
1387	static_assert_imm8_scale!(SCALE);
1388	let src: i32x4 = src.as_i32x4();
1389	let mask: i32x4 = mask.as_i32x4();
1390	let offsets: i64x2 = offsets.as_i64x2();
1391	let slice: const i8 = slice as const i8;
1392	let r: i32x4 = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1393	transmute(src:r)
1394	}
1395
1396	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1397	/// where
1398	/// `scale` should be 1, 2, 4 or 8.
1399	///
1400	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi32)
1401	#[inline]
1402	#[target_feature(enable = "avx2")]
1403	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
1404	#[rustc_legacy_const_generics(`2`)]
1405	#[stable(feature = "simd_x86", since = "1.27.0")]
1406	pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1407	slice: *const i32,
1408	offsets: __m256i,
1409	) -> __m128i {
1410	static_assert_imm8_scale!(SCALE);
1411	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1412	let neg_one: i32x4 = _mm_set1_epi64x(`-1`).as_i32x4();
1413	let offsets: i64x4 = offsets.as_i64x4();
1414	let slice: const i8 = slice as const i8;
1415	let r: i32x4 = vpgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1416	transmute(src:r)
1417	}
1418
1419	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1420	/// where
1421	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1422	/// that position instead.
1423	///
1424	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi32)
1425	#[inline]
1426	#[target_feature(enable = "avx2")]
1427	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
1428	#[rustc_legacy_const_generics(`4`)]
1429	#[stable(feature = "simd_x86", since = "1.27.0")]
1430	pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1431	src: __m128i,
1432	slice: *const i32,
1433	offsets: __m256i,
1434	mask: __m128i,
1435	) -> __m128i {
1436	static_assert_imm8_scale!(SCALE);
1437	let src: i32x4 = src.as_i32x4();
1438	let mask: i32x4 = mask.as_i32x4();
1439	let offsets: i64x4 = offsets.as_i64x4();
1440	let slice: const i8 = slice as const i8;
1441	let r: i32x4 = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1442	transmute(src:r)
1443	}
1444
1445	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1446	/// where
1447	/// `scale` should be 1, 2, 4 or 8.
1448	///
1449	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_ps)
1450	#[inline]
1451	#[target_feature(enable = "avx2")]
1452	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
1453	#[rustc_legacy_const_generics(`2`)]
1454	#[stable(feature = "simd_x86", since = "1.27.0")]
1455	pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1456	static_assert_imm8_scale!(SCALE);
1457	let zero: __m128 = _mm_setzero_ps();
1458	let neg_one: __m128 = _mm_set1_ps(`-1.0`);
1459	let offsets: i64x2 = offsets.as_i64x2();
1460	let slice: const i8 = slice as const i8;
1461	pgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1462	}
1463
1464	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1465	/// where
1466	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1467	/// that position instead.
1468	///
1469	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_ps)
1470	#[inline]
1471	#[target_feature(enable = "avx2")]
1472	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
1473	#[rustc_legacy_const_generics(`4`)]
1474	#[stable(feature = "simd_x86", since = "1.27.0")]
1475	pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1476	src: __m128,
1477	slice: *const f32,
1478	offsets: __m128i,
1479	mask: __m128,
1480	) -> __m128 {
1481	static_assert_imm8_scale!(SCALE);
1482	let offsets: i64x2 = offsets.as_i64x2();
1483	let slice: const i8 = slice as const i8;
1484	pgatherqps(src, slice, offsets, mask, SCALE as i8)
1485	}
1486
1487	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1488	/// where
1489	/// `scale` should be 1, 2, 4 or 8.
1490	///
1491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_ps)
1492	#[inline]
1493	#[target_feature(enable = "avx2")]
1494	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
1495	#[rustc_legacy_const_generics(`2`)]
1496	#[stable(feature = "simd_x86", since = "1.27.0")]
1497	pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1498	static_assert_imm8_scale!(SCALE);
1499	let zero: __m128 = _mm_setzero_ps();
1500	let neg_one: __m128 = _mm_set1_ps(`-1.0`);
1501	let offsets: i64x4 = offsets.as_i64x4();
1502	let slice: const i8 = slice as const i8;
1503	vpgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1504	}
1505
1506	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1507	/// where
1508	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1509	/// that position instead.
1510	///
1511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_ps)
1512	#[inline]
1513	#[target_feature(enable = "avx2")]
1514	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
1515	#[rustc_legacy_const_generics(`4`)]
1516	#[stable(feature = "simd_x86", since = "1.27.0")]
1517	pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1518	src: __m128,
1519	slice: *const f32,
1520	offsets: __m256i,
1521	mask: __m128,
1522	) -> __m128 {
1523	static_assert_imm8_scale!(SCALE);
1524	let offsets: i64x4 = offsets.as_i64x4();
1525	let slice: const i8 = slice as const i8;
1526	vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1527	}
1528
1529	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1530	/// where
1531	/// `scale` should be 1, 2, 4 or 8.
1532	///
1533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi64)
1534	#[inline]
1535	#[target_feature(enable = "avx2")]
1536	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
1537	#[rustc_legacy_const_generics(`2`)]
1538	#[stable(feature = "simd_x86", since = "1.27.0")]
1539	pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1540	slice: *const i64,
1541	offsets: __m128i,
1542	) -> __m128i {
1543	static_assert_imm8_scale!(SCALE);
1544	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1545	let neg_one: i64x2 = _mm_set1_epi64x(`-1`).as_i64x2();
1546	let slice: const i8 = slice as const i8;
1547	let offsets: i64x2 = offsets.as_i64x2();
1548	let r: i64x2 = pgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1549	transmute(src:r)
1550	}
1551
1552	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1553	/// where
1554	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1555	/// that position instead.
1556	///
1557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi64)
1558	#[inline]
1559	#[target_feature(enable = "avx2")]
1560	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
1561	#[rustc_legacy_const_generics(`4`)]
1562	#[stable(feature = "simd_x86", since = "1.27.0")]
1563	pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1564	src: __m128i,
1565	slice: *const i64,
1566	offsets: __m128i,
1567	mask: __m128i,
1568	) -> __m128i {
1569	static_assert_imm8_scale!(SCALE);
1570	let src: i64x2 = src.as_i64x2();
1571	let mask: i64x2 = mask.as_i64x2();
1572	let offsets: i64x2 = offsets.as_i64x2();
1573	let slice: const i8 = slice as const i8;
1574	let r: i64x2 = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1575	transmute(src:r)
1576	}
1577
1578	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1579	/// where
1580	/// `scale` should be 1, 2, 4 or 8.
1581	///
1582	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi64)
1583	#[inline]
1584	#[target_feature(enable = "avx2")]
1585	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
1586	#[rustc_legacy_const_generics(`2`)]
1587	#[stable(feature = "simd_x86", since = "1.27.0")]
1588	pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1589	slice: *const i64,
1590	offsets: __m256i,
1591	) -> __m256i {
1592	static_assert_imm8_scale!(SCALE);
1593	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1594	let neg_one: i64x4 = _mm256_set1_epi64x(`-1`).as_i64x4();
1595	let slice: const i8 = slice as const i8;
1596	let offsets: i64x4 = offsets.as_i64x4();
1597	let r: i64x4 = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1598	transmute(src:r)
1599	}
1600
1601	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1602	/// where
1603	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1604	/// that position instead.
1605	///
1606	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi64)
1607	#[inline]
1608	#[target_feature(enable = "avx2")]
1609	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
1610	#[rustc_legacy_const_generics(`4`)]
1611	#[stable(feature = "simd_x86", since = "1.27.0")]
1612	pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1613	src: __m256i,
1614	slice: *const i64,
1615	offsets: __m256i,
1616	mask: __m256i,
1617	) -> __m256i {
1618	static_assert_imm8_scale!(SCALE);
1619	let src: i64x4 = src.as_i64x4();
1620	let mask: i64x4 = mask.as_i64x4();
1621	let offsets: i64x4 = offsets.as_i64x4();
1622	let slice: const i8 = slice as const i8;
1623	let r: i64x4 = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1624	transmute(src:r)
1625	}
1626
1627	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1628	/// where
1629	/// `scale` should be 1, 2, 4 or 8.
1630	///
1631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd)
1632	#[inline]
1633	#[target_feature(enable = "avx2")]
1634	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
1635	#[rustc_legacy_const_generics(`2`)]
1636	#[stable(feature = "simd_x86", since = "1.27.0")]
1637	pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1638	static_assert_imm8_scale!(SCALE);
1639	let zero: __m128d = _mm_setzero_pd();
1640	let neg_one: __m128d = _mm_set1_pd(`-1.0`);
1641	let slice: const i8 = slice as const i8;
1642	let offsets: i64x2 = offsets.as_i64x2();
1643	pgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1644	}
1645
1646	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1647	/// where
1648	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1649	/// that position instead.
1650	///
1651	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd)
1652	#[inline]
1653	#[target_feature(enable = "avx2")]
1654	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
1655	#[rustc_legacy_const_generics(`4`)]
1656	#[stable(feature = "simd_x86", since = "1.27.0")]
1657	pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1658	src: __m128d,
1659	slice: *const f64,
1660	offsets: __m128i,
1661	mask: __m128d,
1662	) -> __m128d {
1663	static_assert_imm8_scale!(SCALE);
1664	let slice: const i8 = slice as const i8;
1665	let offsets: i64x2 = offsets.as_i64x2();
1666	pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1667	}
1668
1669	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1670	/// where
1671	/// `scale` should be 1, 2, 4 or 8.
1672	///
1673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd)
1674	#[inline]
1675	#[target_feature(enable = "avx2")]
1676	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
1677	#[rustc_legacy_const_generics(`2`)]
1678	#[stable(feature = "simd_x86", since = "1.27.0")]
1679	pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1680	slice: *const f64,
1681	offsets: __m256i,
1682	) -> __m256d {
1683	static_assert_imm8_scale!(SCALE);
1684	let zero: __m256d = _mm256_setzero_pd();
1685	let neg_one: __m256d = _mm256_set1_pd(`-1.0`);
1686	let slice: const i8 = slice as const i8;
1687	let offsets: i64x4 = offsets.as_i64x4();
1688	vpgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1689	}
1690
1691	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1692	/// where
1693	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1694	/// that position instead.
1695	///
1696	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd)
1697	#[inline]
1698	#[target_feature(enable = "avx2")]
1699	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
1700	#[rustc_legacy_const_generics(`4`)]
1701	#[stable(feature = "simd_x86", since = "1.27.0")]
1702	pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1703	src: __m256d,
1704	slice: *const f64,
1705	offsets: __m256i,
1706	mask: __m256d,
1707	) -> __m256d {
1708	static_assert_imm8_scale!(SCALE);
1709	let slice: const i8 = slice as const i8;
1710	let offsets: i64x4 = offsets.as_i64x4();
1711	vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1712	}
1713
1714	/// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
1715	/// location specified by `IMM1`.
1716	///
1717	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti128_si256)
1718	#[inline]
1719	#[target_feature(enable = "avx2")]
1720	#[cfg_attr(
1721	all(test, not(target_os = "windows")),
1722	assert_instr(vinsertf128, IMM1 = `1`)
1723	)]
1724	#[rustc_legacy_const_generics(`2`)]
1725	#[stable(feature = "simd_x86", since = "1.27.0")]
1726	pub unsafe fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1727	static_assert_uimm_bits!(IMM1, `1`);
1728	let a: i64x4 = a.as_i64x4();
1729	let b: i64x4 = _mm256_castsi128_si256(b).as_i64x4();
1730	let dst: i64x4 = simd_shuffle!(a, b, [[`4`, `5`, `2`, `3`], [`0`, `1`, `4`, `5`]][IMM1 as usize]);
1731	transmute(src:dst)
1732	}
1733
1734	/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
1735	/// intermediate signed 32-bit integers. Horizontally add adjacent pairs
1736	/// of intermediate 32-bit integers.
1737	///
1738	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd_epi16)
1739	#[inline]
1740	#[target_feature(enable = "avx2")]
1741	#[cfg_attr(test, assert_instr(vpmaddwd))]
1742	#[stable(feature = "simd_x86", since = "1.27.0")]
1743	pub unsafe fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1744	transmute(src:pmaddwd(a:a.as_i16x16(), b:b.as_i16x16()))
1745	}
1746
1747	/// Vertically multiplies each unsigned 8-bit integer from `a` with the
1748	/// corresponding signed 8-bit integer from `b`, producing intermediate
1749	/// signed 16-bit integers. Horizontally add adjacent pairs of intermediate
1750	/// signed 16-bit integers
1751	///
1752	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16)
1753	#[inline]
1754	#[target_feature(enable = "avx2")]
1755	#[cfg_attr(test, assert_instr(vpmaddubsw))]
1756	#[stable(feature = "simd_x86", since = "1.27.0")]
1757	pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1758	transmute(src:pmaddubsw(a:a.as_u8x32(), b:b.as_u8x32()))
1759	}
1760
1761	/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1762	/// (elements are zeroed out when the highest bit is not set in the
1763	/// corresponding element).
1764	///
1765	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi32)
1766	#[inline]
1767	#[target_feature(enable = "avx2")]
1768	#[cfg_attr(test, assert_instr(vpmaskmovd))]
1769	#[stable(feature = "simd_x86", since = "1.27.0")]
1770	pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1771	transmute(src:maskloadd(mem_addr as *const i8, mask:mask.as_i32x4()))
1772	}
1773
1774	/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1775	/// (elements are zeroed out when the highest bit is not set in the
1776	/// corresponding element).
1777	///
1778	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi32)
1779	#[inline]
1780	#[target_feature(enable = "avx2")]
1781	#[cfg_attr(test, assert_instr(vpmaskmovd))]
1782	#[stable(feature = "simd_x86", since = "1.27.0")]
1783	pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1784	transmute(src:maskloadd256(mem_addr as *const i8, mask:mask.as_i32x8()))
1785	}
1786
1787	/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
1788	/// (elements are zeroed out when the highest bit is not set in the
1789	/// corresponding element).
1790	///
1791	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi64)
1792	#[inline]
1793	#[target_feature(enable = "avx2")]
1794	#[cfg_attr(test, assert_instr(vpmaskmovq))]
1795	#[stable(feature = "simd_x86", since = "1.27.0")]
1796	pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1797	transmute(src:maskloadq(mem_addr as *const i8, mask:mask.as_i64x2()))
1798	}
1799
1800	/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
1801	/// (elements are zeroed out when the highest bit is not set in the
1802	/// corresponding element).
1803	///
1804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi64)
1805	#[inline]
1806	#[target_feature(enable = "avx2")]
1807	#[cfg_attr(test, assert_instr(vpmaskmovq))]
1808	#[stable(feature = "simd_x86", since = "1.27.0")]
1809	pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1810	transmute(src:maskloadq256(mem_addr as *const i8, mask:mask.as_i64x4()))
1811	}
1812
1813	/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
1814	/// using `mask` (elements are not stored when the highest bit is not set
1815	/// in the corresponding element).
1816	///
1817	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi32)
1818	#[inline]
1819	#[target_feature(enable = "avx2")]
1820	#[cfg_attr(test, assert_instr(vpmaskmovd))]
1821	#[stable(feature = "simd_x86", since = "1.27.0")]
1822	pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1823	maskstored(mem_addr as *mut i8, mask:mask.as_i32x4(), a:a.as_i32x4())
1824	}
1825
1826	/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
1827	/// using `mask` (elements are not stored when the highest bit is not set
1828	/// in the corresponding element).
1829	///
1830	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi32)
1831	#[inline]
1832	#[target_feature(enable = "avx2")]
1833	#[cfg_attr(test, assert_instr(vpmaskmovd))]
1834	#[stable(feature = "simd_x86", since = "1.27.0")]
1835	pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1836	maskstored256(mem_addr as *mut i8, mask:mask.as_i32x8(), a:a.as_i32x8())
1837	}
1838
1839	/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
1840	/// using `mask` (elements are not stored when the highest bit is not set
1841	/// in the corresponding element).
1842	///
1843	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi64)
1844	#[inline]
1845	#[target_feature(enable = "avx2")]
1846	#[cfg_attr(test, assert_instr(vpmaskmovq))]
1847	#[stable(feature = "simd_x86", since = "1.27.0")]
1848	pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1849	maskstoreq(mem_addr as *mut i8, mask:mask.as_i64x2(), a:a.as_i64x2())
1850	}
1851
1852	/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
1853	/// using `mask` (elements are not stored when the highest bit is not set
1854	/// in the corresponding element).
1855	///
1856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi64)
1857	#[inline]
1858	#[target_feature(enable = "avx2")]
1859	#[cfg_attr(test, assert_instr(vpmaskmovq))]
1860	#[stable(feature = "simd_x86", since = "1.27.0")]
1861	pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1862	maskstoreq256(mem_addr as *mut i8, mask:mask.as_i64x4(), a:a.as_i64x4())
1863	}
1864
1865	/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
1866	/// maximum values.
1867	///
1868	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi16)
1869	#[inline]
1870	#[target_feature(enable = "avx2")]
1871	#[cfg_attr(test, assert_instr(vpmaxsw))]
1872	#[stable(feature = "simd_x86", since = "1.27.0")]
1873	pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1874	let a: i16x16 = a.as_i16x16();
1875	let b: i16x16 = b.as_i16x16();
1876	transmute(src:simd_select::<i16x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1877	}
1878
1879	/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
1880	/// maximum values.
1881	///
1882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi32)
1883	#[inline]
1884	#[target_feature(enable = "avx2")]
1885	#[cfg_attr(test, assert_instr(vpmaxsd))]
1886	#[stable(feature = "simd_x86", since = "1.27.0")]
1887	pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1888	let a: i32x8 = a.as_i32x8();
1889	let b: i32x8 = b.as_i32x8();
1890	transmute(src:simd_select::<i32x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1891	}
1892
1893	/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
1894	/// maximum values.
1895	///
1896	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi8)
1897	#[inline]
1898	#[target_feature(enable = "avx2")]
1899	#[cfg_attr(test, assert_instr(vpmaxsb))]
1900	#[stable(feature = "simd_x86", since = "1.27.0")]
1901	pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1902	let a: i8x32 = a.as_i8x32();
1903	let b: i8x32 = b.as_i8x32();
1904	transmute(src:simd_select::<i8x32, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1905	}
1906
1907	/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
1908	/// the packed maximum values.
1909	///
1910	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu16)
1911	#[inline]
1912	#[target_feature(enable = "avx2")]
1913	#[cfg_attr(test, assert_instr(vpmaxuw))]
1914	#[stable(feature = "simd_x86", since = "1.27.0")]
1915	pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1916	let a: u16x16 = a.as_u16x16();
1917	let b: u16x16 = b.as_u16x16();
1918	transmute(src:simd_select::<i16x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1919	}
1920
1921	/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
1922	/// the packed maximum values.
1923	///
1924	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu32)
1925	#[inline]
1926	#[target_feature(enable = "avx2")]
1927	#[cfg_attr(test, assert_instr(vpmaxud))]
1928	#[stable(feature = "simd_x86", since = "1.27.0")]
1929	pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1930	let a: u32x8 = a.as_u32x8();
1931	let b: u32x8 = b.as_u32x8();
1932	transmute(src:simd_select::<i32x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1933	}
1934
1935	/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
1936	/// the packed maximum values.
1937	///
1938	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu8)
1939	#[inline]
1940	#[target_feature(enable = "avx2")]
1941	#[cfg_attr(test, assert_instr(vpmaxub))]
1942	#[stable(feature = "simd_x86", since = "1.27.0")]
1943	pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
1944	let a: u8x32 = a.as_u8x32();
1945	let b: u8x32 = b.as_u8x32();
1946	transmute(src:simd_select::<i8x32, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1947	}
1948
1949	/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
1950	/// minimum values.
1951	///
1952	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi16)
1953	#[inline]
1954	#[target_feature(enable = "avx2")]
1955	#[cfg_attr(test, assert_instr(vpminsw))]
1956	#[stable(feature = "simd_x86", since = "1.27.0")]
1957	pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
1958	let a: i16x16 = a.as_i16x16();
1959	let b: i16x16 = b.as_i16x16();
1960	transmute(src:simd_select::<i16x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
1961	}
1962
1963	/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
1964	/// minimum values.
1965	///
1966	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi32)
1967	#[inline]
1968	#[target_feature(enable = "avx2")]
1969	#[cfg_attr(test, assert_instr(vpminsd))]
1970	#[stable(feature = "simd_x86", since = "1.27.0")]
1971	pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
1972	let a: i32x8 = a.as_i32x8();
1973	let b: i32x8 = b.as_i32x8();
1974	transmute(src:simd_select::<i32x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
1975	}
1976
1977	/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
1978	/// minimum values.
1979	///
1980	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi8)
1981	#[inline]
1982	#[target_feature(enable = "avx2")]
1983	#[cfg_attr(test, assert_instr(vpminsb))]
1984	#[stable(feature = "simd_x86", since = "1.27.0")]
1985	pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
1986	let a: i8x32 = a.as_i8x32();
1987	let b: i8x32 = b.as_i8x32();
1988	transmute(src:simd_select::<i8x32, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
1989	}
1990
1991	/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
1992	/// the packed minimum values.
1993	///
1994	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu16)
1995	#[inline]
1996	#[target_feature(enable = "avx2")]
1997	#[cfg_attr(test, assert_instr(vpminuw))]
1998	#[stable(feature = "simd_x86", since = "1.27.0")]
1999	pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2000	let a: u16x16 = a.as_u16x16();
2001	let b: u16x16 = b.as_u16x16();
2002	transmute(src:simd_select::<i16x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2003	}
2004
2005	/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
2006	/// the packed minimum values.
2007	///
2008	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu32)
2009	#[inline]
2010	#[target_feature(enable = "avx2")]
2011	#[cfg_attr(test, assert_instr(vpminud))]
2012	#[stable(feature = "simd_x86", since = "1.27.0")]
2013	pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2014	let a: u32x8 = a.as_u32x8();
2015	let b: u32x8 = b.as_u32x8();
2016	transmute(src:simd_select::<i32x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2017	}
2018
2019	/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2020	/// the packed minimum values.
2021	///
2022	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu8)
2023	#[inline]
2024	#[target_feature(enable = "avx2")]
2025	#[cfg_attr(test, assert_instr(vpminub))]
2026	#[stable(feature = "simd_x86", since = "1.27.0")]
2027	pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2028	let a: u8x32 = a.as_u8x32();
2029	let b: u8x32 = b.as_u8x32();
2030	transmute(src:simd_select::<i8x32, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2031	}
2032
2033	/// Creates mask from the most significant bit of each 8-bit element in `a`,
2034	/// return the result.
2035	///
2036	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_epi8)
2037	#[inline]
2038	#[target_feature(enable = "avx2")]
2039	#[cfg_attr(test, assert_instr(vpmovmskb))]
2040	#[stable(feature = "simd_x86", since = "1.27.0")]
2041	pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2042	let z: i8x32 = i8x32::splat(`0`);
2043	let m: i8x32 = simd_lt(x:a.as_i8x32(), y:z);
2044	simd_bitmask::<_, u32>(m) as i32
2045	}
2046
2047	/// Computes the sum of absolute differences (SADs) of quadruplets of unsigned
2048	/// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit
2049	/// results in dst. Eight SADs are performed for each 128-bit lane using one
2050	/// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is
2051	/// selected from `b` starting at on the offset specified in `imm8`. Eight
2052	/// quadruplets are formed from sequential 8-bit integers selected from `a`
2053	/// starting at the offset specified in `imm8`.
2054	///
2055	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mpsadbw_epu8)
2056	#[inline]
2057	#[target_feature(enable = "avx2")]
2058	#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = `0`))]
2059	#[rustc_legacy_const_generics(`2`)]
2060	#[stable(feature = "simd_x86", since = "1.27.0")]
2061	pub unsafe fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2062	static_assert_uimm_bits!(IMM8, `8`);
2063	transmute(src:mpsadbw(a:a.as_u8x32(), b:b.as_u8x32(), IMM8))
2064	}
2065
2066	/// Multiplies the low 32-bit integers from each packed 64-bit element in
2067	/// `a` and `b`
2068	///
2069	/// Returns the 64-bit results.
2070	///
2071	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epi32)
2072	#[inline]
2073	#[target_feature(enable = "avx2")]
2074	#[cfg_attr(test, assert_instr(vpmuldq))]
2075	#[stable(feature = "simd_x86", since = "1.27.0")]
2076	pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2077	let a: i64x4 = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2078	let b: i64x4 = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2079	transmute(src:simd_mul(x:a, y:b))
2080	}
2081
2082	/// Multiplies the low unsigned 32-bit integers from each packed 64-bit
2083	/// element in `a` and `b`
2084	///
2085	/// Returns the unsigned 64-bit results.
2086	///
2087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epu32)
2088	#[inline]
2089	#[target_feature(enable = "avx2")]
2090	#[cfg_attr(test, assert_instr(vpmuludq))]
2091	#[stable(feature = "simd_x86", since = "1.27.0")]
2092	pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2093	let a: u64x4 = a.as_u64x4();
2094	let b: u64x4 = b.as_u64x4();
2095	let mask: u64x4 = u64x4::splat(u32::MAX.into());
2096	transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
2097	}
2098
2099	/// Multiplies the packed 16-bit integers in `a` and `b`, producing
2100	/// intermediate 32-bit integers and returning the high 16 bits of the
2101	/// intermediate integers.
2102	///
2103	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epi16)
2104	#[inline]
2105	#[target_feature(enable = "avx2")]
2106	#[cfg_attr(test, assert_instr(vpmulhw))]
2107	#[stable(feature = "simd_x86", since = "1.27.0")]
2108	pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2109	let a: i32x16 = simd_cast::<_, i32x16>(a.as_i16x16());
2110	let b: i32x16 = simd_cast::<_, i32x16>(b.as_i16x16());
2111	let r: i32x16 = simd_shr(lhs:simd_mul(a, b), rhs:i32x16::splat(`16`));
2112	transmute(src:simd_cast::<i32x16, i16x16>(r))
2113	}
2114
2115	/// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing
2116	/// intermediate 32-bit integers and returning the high 16 bits of the
2117	/// intermediate integers.
2118	///
2119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epu16)
2120	#[inline]
2121	#[target_feature(enable = "avx2")]
2122	#[cfg_attr(test, assert_instr(vpmulhuw))]
2123	#[stable(feature = "simd_x86", since = "1.27.0")]
2124	pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2125	let a: u32x16 = simd_cast::<_, u32x16>(a.as_u16x16());
2126	let b: u32x16 = simd_cast::<_, u32x16>(b.as_u16x16());
2127	let r: u32x16 = simd_shr(lhs:simd_mul(a, b), rhs:u32x16::splat(`16`));
2128	transmute(src:simd_cast::<u32x16, u16x16>(r))
2129	}
2130
2131	/// Multiplies the packed 16-bit integers in `a` and `b`, producing
2132	/// intermediate 32-bit integers, and returns the low 16 bits of the
2133	/// intermediate integers
2134	///
2135	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi16)
2136	#[inline]
2137	#[target_feature(enable = "avx2")]
2138	#[cfg_attr(test, assert_instr(vpmullw))]
2139	#[stable(feature = "simd_x86", since = "1.27.0")]
2140	pub unsafe fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2141	transmute(src:simd_mul(x:a.as_i16x16(), y:b.as_i16x16()))
2142	}
2143
2144	/// Multiplies the packed 32-bit integers in `a` and `b`, producing
2145	/// intermediate 64-bit integers, and returns the low 32 bits of the
2146	/// intermediate integers
2147	///
2148	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi32)
2149	#[inline]
2150	#[target_feature(enable = "avx2")]
2151	#[cfg_attr(test, assert_instr(vpmulld))]
2152	#[stable(feature = "simd_x86", since = "1.27.0")]
2153	pub unsafe fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2154	transmute(src:simd_mul(x:a.as_i32x8(), y:b.as_i32x8()))
2155	}
2156
2157	/// Multiplies packed 16-bit integers in `a` and `b`, producing
2158	/// intermediate signed 32-bit integers. Truncate each intermediate
2159	/// integer to the 18 most significant bits, round by adding 1, and
2160	/// return bits `[16:1]`.
2161	///
2162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhrs_epi16)
2163	#[inline]
2164	#[target_feature(enable = "avx2")]
2165	#[cfg_attr(test, assert_instr(vpmulhrsw))]
2166	#[stable(feature = "simd_x86", since = "1.27.0")]
2167	pub unsafe fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2168	transmute(src:pmulhrsw(a:a.as_i16x16(), b:b.as_i16x16()))
2169	}
2170
2171	/// Computes the bitwise OR of 256 bits (representing integer data) in `a`
2172	/// and `b`
2173	///
2174	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_si256)
2175	#[inline]
2176	#[target_feature(enable = "avx2")]
2177	#[cfg_attr(test, assert_instr(vorps))]
2178	#[stable(feature = "simd_x86", since = "1.27.0")]
2179	pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2180	transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8()))
2181	}
2182
2183	/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2184	/// using signed saturation
2185	///
2186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16)
2187	#[inline]
2188	#[target_feature(enable = "avx2")]
2189	#[cfg_attr(test, assert_instr(vpacksswb))]
2190	#[stable(feature = "simd_x86", since = "1.27.0")]
2191	pub unsafe fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2192	transmute(src:packsswb(a:a.as_i16x16(), b:b.as_i16x16()))
2193	}
2194
2195	/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2196	/// using signed saturation
2197	///
2198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32)
2199	#[inline]
2200	#[target_feature(enable = "avx2")]
2201	#[cfg_attr(test, assert_instr(vpackssdw))]
2202	#[stable(feature = "simd_x86", since = "1.27.0")]
2203	pub unsafe fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2204	transmute(src:packssdw(a:a.as_i32x8(), b:b.as_i32x8()))
2205	}
2206
2207	/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2208	/// using unsigned saturation
2209	///
2210	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16)
2211	#[inline]
2212	#[target_feature(enable = "avx2")]
2213	#[cfg_attr(test, assert_instr(vpackuswb))]
2214	#[stable(feature = "simd_x86", since = "1.27.0")]
2215	pub unsafe fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2216	transmute(src:packuswb(a:a.as_i16x16(), b:b.as_i16x16()))
2217	}
2218
2219	/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2220	/// using unsigned saturation
2221	///
2222	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32)
2223	#[inline]
2224	#[target_feature(enable = "avx2")]
2225	#[cfg_attr(test, assert_instr(vpackusdw))]
2226	#[stable(feature = "simd_x86", since = "1.27.0")]
2227	pub unsafe fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2228	transmute(src:packusdw(a:a.as_i32x8(), b:b.as_i32x8()))
2229	}
2230
2231	/// Permutes packed 32-bit integers from `a` according to the content of `b`.
2232	///
2233	/// The last 3 bits of each integer of `b` are used as addresses into the 8
2234	/// integers of `a`.
2235	///
2236	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32)
2237	#[inline]
2238	#[target_feature(enable = "avx2")]
2239	#[cfg_attr(test, assert_instr(vpermps))]
2240	#[stable(feature = "simd_x86", since = "1.27.0")]
2241	pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2242	transmute(src:permd(a:a.as_u32x8(), b:b.as_u32x8()))
2243	}
2244
2245	/// Permutes 64-bit integers from `a` using control mask `imm8`.
2246	///
2247	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_epi64)
2248	#[inline]
2249	#[target_feature(enable = "avx2")]
2250	#[cfg_attr(test, assert_instr(vpermpd, IMM8 = `9`))]
2251	#[rustc_legacy_const_generics(`1`)]
2252	#[stable(feature = "simd_x86", since = "1.27.0")]
2253	pub unsafe fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2254	static_assert_uimm_bits!(IMM8, `8`);
2255	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
2256	let r: i64x4 = simd_shuffle!(
2257	a.as_i64x4(),
2258	zero,
2259	[
2260	IMM8 as u32 & `0b11`,
2261	(IMM8 as u32 >> `2`) & `0b11`,
2262	(IMM8 as u32 >> `4`) & `0b11`,
2263	(IMM8 as u32 >> `6`) & `0b11`,
2264	],
2265	);
2266	transmute(src:r)
2267	}
2268
2269	/// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`.
2270	///
2271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256)
2272	#[inline]
2273	#[target_feature(enable = "avx2")]
2274	#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = `9`))]
2275	#[rustc_legacy_const_generics(`2`)]
2276	#[stable(feature = "simd_x86", since = "1.27.0")]
2277	pub unsafe fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2278	static_assert_uimm_bits!(IMM8, `8`);
2279	transmute(src:vperm2i128(a:a.as_i64x4(), b:b.as_i64x4(), IMM8 as i8))
2280	}
2281
2282	/// Shuffles 64-bit floating-point elements in `a` across lanes using the
2283	/// control in `imm8`.
2284	///
2285	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_pd)
2286	#[inline]
2287	#[target_feature(enable = "avx2")]
2288	#[cfg_attr(test, assert_instr(vpermpd, IMM8 = `1`))]
2289	#[rustc_legacy_const_generics(`1`)]
2290	#[stable(feature = "simd_x86", since = "1.27.0")]
2291	pub unsafe fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2292	static_assert_uimm_bits!(IMM8, `8`);
2293	simd_shuffle!(
2294	a,
2295	_mm256_undefined_pd(),
2296	[
2297	IMM8 as u32 & `0b11`,
2298	(IMM8 as u32 >> `2`) & `0b11`,
2299	(IMM8 as u32 >> `4`) & `0b11`,
2300	(IMM8 as u32 >> `6`) & `0b11`,
2301	],
2302	)
2303	}
2304
2305	/// Shuffles eight 32-bit floating-point elements in `a` across lanes using
2306	/// the corresponding 32-bit integer index in `idx`.
2307	///
2308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_ps)
2309	#[inline]
2310	#[target_feature(enable = "avx2")]
2311	#[cfg_attr(test, assert_instr(vpermps))]
2312	#[stable(feature = "simd_x86", since = "1.27.0")]
2313	pub unsafe fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2314	permps(a, b:idx.as_i32x8())
2315	}
2316
2317	/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
2318	/// and `b`, then horizontally sum each consecutive 8 differences to
2319	/// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
2320	/// integers in the low 16 bits of the 64-bit return value
2321	///
2322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sad_epu8)
2323	#[inline]
2324	#[target_feature(enable = "avx2")]
2325	#[cfg_attr(test, assert_instr(vpsadbw))]
2326	#[stable(feature = "simd_x86", since = "1.27.0")]
2327	pub unsafe fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2328	transmute(src:psadbw(a:a.as_u8x32(), b:b.as_u8x32()))
2329	}
2330
2331	/// Shuffles bytes from `a` according to the content of `b`.
2332	///
2333	/// For each of the 128-bit low and high halves of the vectors, the last
2334	/// 4 bits of each byte of `b` are used as addresses into the respective
2335	/// low or high 16 bytes of `a`. That is, the halves are shuffled separately.
2336	///
2337	/// In addition, if the highest significant bit of a byte of `b` is set, the
2338	/// respective destination byte is set to 0.
2339	///
2340	/// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically
2341	/// equivalent to:
2342	///
2343	/// ```
2344	/// fn mm256_shuffle_epi8(a: [u8; `32`], b: [u8; `32`]) -> [u8; `32`] {
2345	/// let mut r = [`0`; `32`];
2346	/// for i in `0`..`16` {
2347	/// // if the most significant bit of b is set,
2348	/// // then the destination byte is set to 0.
2349	/// if b[i] & `0x80` == `0u8` {
2350	/// r[i] = a[(b[i] % `16`) as usize];
2351	/// }
2352	/// if b[i + `16`] & `0x80` == `0u8` {
2353	/// r[i + `16`] = a[(b[i + `16`] % `16` + `16`) as usize];
2354	/// }
2355	/// }
2356	/// r
2357	/// }
2358	/// ```
2359	///
2360	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi8)
2361	#[inline]
2362	#[target_feature(enable = "avx2")]
2363	#[cfg_attr(test, assert_instr(vpshufb))]
2364	#[stable(feature = "simd_x86", since = "1.27.0")]
2365	pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2366	transmute(src:pshufb(a:a.as_u8x32(), b:b.as_u8x32()))
2367	}
2368
2369	/// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in
2370	/// `imm8`.
2371	///
2372	/// ```rust
2373	/// #[cfg(target_arch = "x86")]
2374	/// use std::arch::x86::*;
2375	/// #[cfg(target_arch = "x86_64")]
2376	/// use std::arch::x86_64::*;
2377	///
2378	/// # fn main() {
2379	/// # if is_x86_feature_detected!("avx2") {
2380	/// # #[target_feature(enable = "avx2")]
2381	/// # unsafe fn worker() {
2382	/// let a = _mm256_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
2383	///
2384	/// let c1 = _mm256_shuffle_epi32(a, `0b00_11_10_01`);
2385	/// let c2 = _mm256_shuffle_epi32(a, `0b01_00_10_11`);
2386	///
2387	/// let expected1 = _mm256_setr_epi32(`1`, `2`, `3`, `0`, `5`, `6`, `7`, `4`);
2388	/// let expected2 = _mm256_setr_epi32(`3`, `2`, `0`, `1`, `7`, `6`, `4`, `5`);
2389	///
2390	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c1, expected1)), !`0`);
2391	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c2, expected2)), !`0`);
2392	/// # }
2393	/// # unsafe { worker(); }
2394	/// # }
2395	/// # }
2396	/// ```
2397	///
2398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi32)
2399	#[inline]
2400	#[target_feature(enable = "avx2")]
2401	#[cfg_attr(test, assert_instr(vshufps, MASK = `9`))]
2402	#[rustc_legacy_const_generics(`1`)]
2403	#[stable(feature = "simd_x86", since = "1.27.0")]
2404	pub unsafe fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2405	static_assert_uimm_bits!(MASK, `8`);
2406	let r: i32x8 = simd_shuffle!(
2407	a.as_i32x8(),
2408	a.as_i32x8(),
2409	[
2410	MASK as u32 & `0b11`,
2411	(MASK as u32 >> `2`) & `0b11`,
2412	(MASK as u32 >> `4`) & `0b11`,
2413	(MASK as u32 >> `6`) & `0b11`,
2414	(MASK as u32 & `0b11`) + `4`,
2415	((MASK as u32 >> `2`) & `0b11`) + `4`,
2416	((MASK as u32 >> `4`) & `0b11`) + `4`,
2417	((MASK as u32 >> `6`) & `0b11`) + `4`,
2418	],
2419	);
2420	transmute(src:r)
2421	}
2422
2423	/// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using
2424	/// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied
2425	/// to the output.
2426	///
2427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflehi_epi16)
2428	#[inline]
2429	#[target_feature(enable = "avx2")]
2430	#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = `9`))]
2431	#[rustc_legacy_const_generics(`1`)]
2432	#[stable(feature = "simd_x86", since = "1.27.0")]
2433	pub unsafe fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2434	static_assert_uimm_bits!(IMM8, `8`);
2435	let a = a.as_i16x16();
2436	let r: i16x16 = simd_shuffle!(
2437	a,
2438	a,
2439	[
2440	`0`,
2441	`1`,
2442	`2`,
2443	`3`,
2444	`4` + (IMM8 as u32 & `0b11`),
2445	`4` + ((IMM8 as u32 >> `2`) & `0b11`),
2446	`4` + ((IMM8 as u32 >> `4`) & `0b11`),
2447	`4` + ((IMM8 as u32 >> `6`) & `0b11`),
2448	`8`,
2449	`9`,
2450	`10`,
2451	`11`,
2452	`12` + (IMM8 as u32 & `0b11`),
2453	`12` + ((IMM8 as u32 >> `2`) & `0b11`),
2454	`12` + ((IMM8 as u32 >> `4`) & `0b11`),
2455	`12` + ((IMM8 as u32 >> `6`) & `0b11`),
2456	],
2457	);
2458	transmute(r)
2459	}
2460
2461	/// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using
2462	/// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied
2463	/// to the output.
2464	///
2465	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflelo_epi16)
2466	#[inline]
2467	#[target_feature(enable = "avx2")]
2468	#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = `9`))]
2469	#[rustc_legacy_const_generics(`1`)]
2470	#[stable(feature = "simd_x86", since = "1.27.0")]
2471	pub unsafe fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2472	static_assert_uimm_bits!(IMM8, `8`);
2473	let a = a.as_i16x16();
2474	let r: i16x16 = simd_shuffle!(
2475	a,
2476	a,
2477	[
2478	`0` + (IMM8 as u32 & `0b11`),
2479	`0` + ((IMM8 as u32 >> `2`) & `0b11`),
2480	`0` + ((IMM8 as u32 >> `4`) & `0b11`),
2481	`0` + ((IMM8 as u32 >> `6`) & `0b11`),
2482	`4`,
2483	`5`,
2484	`6`,
2485	`7`,
2486	`8` + (IMM8 as u32 & `0b11`),
2487	`8` + ((IMM8 as u32 >> `2`) & `0b11`),
2488	`8` + ((IMM8 as u32 >> `4`) & `0b11`),
2489	`8` + ((IMM8 as u32 >> `6`) & `0b11`),
2490	`12`,
2491	`13`,
2492	`14`,
2493	`15`,
2494	],
2495	);
2496	transmute(r)
2497	}
2498
2499	/// Negates packed 16-bit integers in `a` when the corresponding signed
2500	/// 16-bit integer in `b` is negative, and returns the results.
2501	/// Results are zeroed out when the corresponding element in `b` is zero.
2502	///
2503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi16)
2504	#[inline]
2505	#[target_feature(enable = "avx2")]
2506	#[cfg_attr(test, assert_instr(vpsignw))]
2507	#[stable(feature = "simd_x86", since = "1.27.0")]
2508	pub unsafe fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2509	transmute(src:psignw(a:a.as_i16x16(), b:b.as_i16x16()))
2510	}
2511
2512	/// Negates packed 32-bit integers in `a` when the corresponding signed
2513	/// 32-bit integer in `b` is negative, and returns the results.
2514	/// Results are zeroed out when the corresponding element in `b` is zero.
2515	///
2516	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi32)
2517	#[inline]
2518	#[target_feature(enable = "avx2")]
2519	#[cfg_attr(test, assert_instr(vpsignd))]
2520	#[stable(feature = "simd_x86", since = "1.27.0")]
2521	pub unsafe fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2522	transmute(src:psignd(a:a.as_i32x8(), b:b.as_i32x8()))
2523	}
2524
2525	/// Negates packed 8-bit integers in `a` when the corresponding signed
2526	/// 8-bit integer in `b` is negative, and returns the results.
2527	/// Results are zeroed out when the corresponding element in `b` is zero.
2528	///
2529	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi8)
2530	#[inline]
2531	#[target_feature(enable = "avx2")]
2532	#[cfg_attr(test, assert_instr(vpsignb))]
2533	#[stable(feature = "simd_x86", since = "1.27.0")]
2534	pub unsafe fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2535	transmute(src:psignb(a:a.as_i8x32(), b:b.as_i8x32()))
2536	}
2537
2538	/// Shifts packed 16-bit integers in `a` left by `count` while
2539	/// shifting in zeros, and returns the result
2540	///
2541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi16)
2542	#[inline]
2543	#[target_feature(enable = "avx2")]
2544	#[cfg_attr(test, assert_instr(vpsllw))]
2545	#[stable(feature = "simd_x86", since = "1.27.0")]
2546	pub unsafe fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2547	transmute(src:psllw(a:a.as_i16x16(), count:count.as_i16x8()))
2548	}
2549
2550	/// Shifts packed 32-bit integers in `a` left by `count` while
2551	/// shifting in zeros, and returns the result
2552	///
2553	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi32)
2554	#[inline]
2555	#[target_feature(enable = "avx2")]
2556	#[cfg_attr(test, assert_instr(vpslld))]
2557	#[stable(feature = "simd_x86", since = "1.27.0")]
2558	pub unsafe fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2559	transmute(src:pslld(a:a.as_i32x8(), count:count.as_i32x4()))
2560	}
2561
2562	/// Shifts packed 64-bit integers in `a` left by `count` while
2563	/// shifting in zeros, and returns the result
2564	///
2565	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi64)
2566	#[inline]
2567	#[target_feature(enable = "avx2")]
2568	#[cfg_attr(test, assert_instr(vpsllq))]
2569	#[stable(feature = "simd_x86", since = "1.27.0")]
2570	pub unsafe fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2571	transmute(src:psllq(a:a.as_i64x4(), count:count.as_i64x2()))
2572	}
2573
2574	/// Shifts packed 16-bit integers in `a` left by `IMM8` while
2575	/// shifting in zeros, return the results;
2576	///
2577	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi16)
2578	#[inline]
2579	#[target_feature(enable = "avx2")]
2580	#[cfg_attr(test, assert_instr(vpsllw, IMM8 = `7`))]
2581	#[rustc_legacy_const_generics(`1`)]
2582	#[stable(feature = "simd_x86", since = "1.27.0")]
2583	pub unsafe fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2584	static_assert_uimm_bits!(IMM8, `8`);
2585	if IMM8 >= `16` {
2586	_mm256_setzero_si256()
2587	} else {
2588	transmute(src:simd_shl(lhs:a.as_u16x16(), rhs:u16x16::splat(IMM8 as u16)))
2589	}
2590	}
2591
2592	/// Shifts packed 32-bit integers in `a` left by `IMM8` while
2593	/// shifting in zeros, return the results;
2594	///
2595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi32)
2596	#[inline]
2597	#[target_feature(enable = "avx2")]
2598	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `7`))]
2599	#[rustc_legacy_const_generics(`1`)]
2600	#[stable(feature = "simd_x86", since = "1.27.0")]
2601	pub unsafe fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2602	static_assert_uimm_bits!(IMM8, `8`);
2603	if IMM8 >= `32` {
2604	_mm256_setzero_si256()
2605	} else {
2606	transmute(src:simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8 as u32)))
2607	}
2608	}
2609
2610	/// Shifts packed 64-bit integers in `a` left by `IMM8` while
2611	/// shifting in zeros, return the results;
2612	///
2613	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi64)
2614	#[inline]
2615	#[target_feature(enable = "avx2")]
2616	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `7`))]
2617	#[rustc_legacy_const_generics(`1`)]
2618	#[stable(feature = "simd_x86", since = "1.27.0")]
2619	pub unsafe fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2620	static_assert_uimm_bits!(IMM8, `8`);
2621	if IMM8 >= `64` {
2622	_mm256_setzero_si256()
2623	} else {
2624	transmute(src:simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64)))
2625	}
2626	}
2627
2628	/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2629	///
2630	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_si256)
2631	#[inline]
2632	#[target_feature(enable = "avx2")]
2633	#[cfg_attr(test, assert_instr(vpslldq, IMM8 = `3`))]
2634	#[rustc_legacy_const_generics(`1`)]
2635	#[stable(feature = "simd_x86", since = "1.27.0")]
2636	pub unsafe fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2637	static_assert_uimm_bits!(IMM8, `8`);
2638	_mm256_bslli_epi128::<IMM8>(a)
2639	}
2640
2641	/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2642	///
2643	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bslli_epi128)
2644	#[inline]
2645	#[target_feature(enable = "avx2")]
2646	#[cfg_attr(test, assert_instr(vpslldq, IMM8 = `3`))]
2647	#[rustc_legacy_const_generics(`1`)]
2648	#[stable(feature = "simd_x86", since = "1.27.0")]
2649	pub unsafe fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2650	static_assert_uimm_bits!(IMM8, `8`);
2651	const fn mask(shift: i32, i: u32) -> u32 {
2652	let shift = shift as u32 & `0xff`;
2653	if shift > `15` \|\| i % `16` < shift {
2654	`0`
2655	} else {
2656	`32` + (i - shift)
2657	}
2658	}
2659	let a = a.as_i8x32();
2660	let zero = _mm256_setzero_si256().as_i8x32();
2661	let r: i8x32 = simd_shuffle!(
2662	zero,
2663	a,
2664	[
2665	mask(IMM8, `0`),
2666	mask(IMM8, `1`),
2667	mask(IMM8, `2`),
2668	mask(IMM8, `3`),
2669	mask(IMM8, `4`),
2670	mask(IMM8, `5`),
2671	mask(IMM8, `6`),
2672	mask(IMM8, `7`),
2673	mask(IMM8, `8`),
2674	mask(IMM8, `9`),
2675	mask(IMM8, `10`),
2676	mask(IMM8, `11`),
2677	mask(IMM8, `12`),
2678	mask(IMM8, `13`),
2679	mask(IMM8, `14`),
2680	mask(IMM8, `15`),
2681	mask(IMM8, `16`),
2682	mask(IMM8, `17`),
2683	mask(IMM8, `18`),
2684	mask(IMM8, `19`),
2685	mask(IMM8, `20`),
2686	mask(IMM8, `21`),
2687	mask(IMM8, `22`),
2688	mask(IMM8, `23`),
2689	mask(IMM8, `24`),
2690	mask(IMM8, `25`),
2691	mask(IMM8, `26`),
2692	mask(IMM8, `27`),
2693	mask(IMM8, `28`),
2694	mask(IMM8, `29`),
2695	mask(IMM8, `30`),
2696	mask(IMM8, `31`),
2697	],
2698	);
2699	transmute(r)
2700	}
2701
2702	/// Shifts packed 32-bit integers in `a` left by the amount
2703	/// specified by the corresponding element in `count` while
2704	/// shifting in zeros, and returns the result.
2705	///
2706	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi32)
2707	#[inline]
2708	#[target_feature(enable = "avx2")]
2709	#[cfg_attr(test, assert_instr(vpsllvd))]
2710	#[stable(feature = "simd_x86", since = "1.27.0")]
2711	pub unsafe fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2712	transmute(src:psllvd(a:a.as_i32x4(), count:count.as_i32x4()))
2713	}
2714
2715	/// Shifts packed 32-bit integers in `a` left by the amount
2716	/// specified by the corresponding element in `count` while
2717	/// shifting in zeros, and returns the result.
2718	///
2719	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi32)
2720	#[inline]
2721	#[target_feature(enable = "avx2")]
2722	#[cfg_attr(test, assert_instr(vpsllvd))]
2723	#[stable(feature = "simd_x86", since = "1.27.0")]
2724	pub unsafe fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2725	transmute(src:psllvd256(a:a.as_i32x8(), count:count.as_i32x8()))
2726	}
2727
2728	/// Shifts packed 64-bit integers in `a` left by the amount
2729	/// specified by the corresponding element in `count` while
2730	/// shifting in zeros, and returns the result.
2731	///
2732	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi64)
2733	#[inline]
2734	#[target_feature(enable = "avx2")]
2735	#[cfg_attr(test, assert_instr(vpsllvq))]
2736	#[stable(feature = "simd_x86", since = "1.27.0")]
2737	pub unsafe fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2738	transmute(src:psllvq(a:a.as_i64x2(), count:count.as_i64x2()))
2739	}
2740
2741	/// Shifts packed 64-bit integers in `a` left by the amount
2742	/// specified by the corresponding element in `count` while
2743	/// shifting in zeros, and returns the result.
2744	///
2745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi64)
2746	#[inline]
2747	#[target_feature(enable = "avx2")]
2748	#[cfg_attr(test, assert_instr(vpsllvq))]
2749	#[stable(feature = "simd_x86", since = "1.27.0")]
2750	pub unsafe fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2751	transmute(src:psllvq256(a:a.as_i64x4(), count:count.as_i64x4()))
2752	}
2753
2754	/// Shifts packed 16-bit integers in `a` right by `count` while
2755	/// shifting in sign bits.
2756	///
2757	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi16)
2758	#[inline]
2759	#[target_feature(enable = "avx2")]
2760	#[cfg_attr(test, assert_instr(vpsraw))]
2761	#[stable(feature = "simd_x86", since = "1.27.0")]
2762	pub unsafe fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2763	transmute(src:psraw(a:a.as_i16x16(), count:count.as_i16x8()))
2764	}
2765
2766	/// Shifts packed 32-bit integers in `a` right by `count` while
2767	/// shifting in sign bits.
2768	///
2769	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi32)
2770	#[inline]
2771	#[target_feature(enable = "avx2")]
2772	#[cfg_attr(test, assert_instr(vpsrad))]
2773	#[stable(feature = "simd_x86", since = "1.27.0")]
2774	pub unsafe fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2775	transmute(src:psrad(a:a.as_i32x8(), count:count.as_i32x4()))
2776	}
2777
2778	/// Shifts packed 16-bit integers in `a` right by `IMM8` while
2779	/// shifting in sign bits.
2780	///
2781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi16)
2782	#[inline]
2783	#[target_feature(enable = "avx2")]
2784	#[cfg_attr(test, assert_instr(vpsraw, IMM8 = `7`))]
2785	#[rustc_legacy_const_generics(`1`)]
2786	#[stable(feature = "simd_x86", since = "1.27.0")]
2787	pub unsafe fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2788	static_assert_uimm_bits!(IMM8, `8`);
2789	transmute(src:simd_shr(lhs:a.as_i16x16(), rhs:i16x16::splat(IMM8.min(`15`) as i16)))
2790	}
2791
2792	/// Shifts packed 32-bit integers in `a` right by `IMM8` while
2793	/// shifting in sign bits.
2794	///
2795	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi32)
2796	#[inline]
2797	#[target_feature(enable = "avx2")]
2798	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `7`))]
2799	#[rustc_legacy_const_generics(`1`)]
2800	#[stable(feature = "simd_x86", since = "1.27.0")]
2801	pub unsafe fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2802	static_assert_uimm_bits!(IMM8, `8`);
2803	transmute(src:simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(`31`))))
2804	}
2805
2806	/// Shifts packed 32-bit integers in `a` right by the amount specified by the
2807	/// corresponding element in `count` while shifting in sign bits.
2808	///
2809	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi32)
2810	#[inline]
2811	#[target_feature(enable = "avx2")]
2812	#[cfg_attr(test, assert_instr(vpsravd))]
2813	#[stable(feature = "simd_x86", since = "1.27.0")]
2814	pub unsafe fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2815	transmute(src:psravd(a:a.as_i32x4(), count:count.as_i32x4()))
2816	}
2817
2818	/// Shifts packed 32-bit integers in `a` right by the amount specified by the
2819	/// corresponding element in `count` while shifting in sign bits.
2820	///
2821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi32)
2822	#[inline]
2823	#[target_feature(enable = "avx2")]
2824	#[cfg_attr(test, assert_instr(vpsravd))]
2825	#[stable(feature = "simd_x86", since = "1.27.0")]
2826	pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2827	transmute(src:psravd256(a:a.as_i32x8(), count:count.as_i32x8()))
2828	}
2829
2830	/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2831	///
2832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_si256)
2833	#[inline]
2834	#[target_feature(enable = "avx2")]
2835	#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = `1`))]
2836	#[rustc_legacy_const_generics(`1`)]
2837	#[stable(feature = "simd_x86", since = "1.27.0")]
2838	pub unsafe fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2839	static_assert_uimm_bits!(IMM8, `8`);
2840	_mm256_bsrli_epi128::<IMM8>(a)
2841	}
2842
2843	/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2844	///
2845	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bsrli_epi128)
2846	#[inline]
2847	#[target_feature(enable = "avx2")]
2848	#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = `1`))]
2849	#[rustc_legacy_const_generics(`1`)]
2850	#[stable(feature = "simd_x86", since = "1.27.0")]
2851	pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2852	static_assert_uimm_bits!(IMM8, `8`);
2853	let a = a.as_i8x32();
2854	let zero = _mm256_setzero_si256().as_i8x32();
2855	let r: i8x32 = match IMM8 % `16` {
2856	`0` => simd_shuffle!(
2857	a,
2858	zero,
2859	[
2860	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`,
2861	`23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
2862	],
2863	),
2864	`1` => simd_shuffle!(
2865	a,
2866	zero,
2867	[
2868	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
2869	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
2870	],
2871	),
2872	`2` => simd_shuffle!(
2873	a,
2874	zero,
2875	[
2876	`2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
2877	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `32`,
2878	],
2879	),
2880	`3` => simd_shuffle!(
2881	a,
2882	zero,
2883	[
2884	`3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `19`, `20`, `21`, `22`, `23`, `24`,
2885	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `32`, `32`,
2886	],
2887	),
2888	`4` => simd_shuffle!(
2889	a,
2890	zero,
2891	[
2892	`4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `20`, `21`, `22`, `23`, `24`, `25`,
2893	`26`, `27`, `28`, `29`, `30`, `31`, `32`, `32`, `32`, `32`,
2894	],
2895	),
2896	`5` => simd_shuffle!(
2897	a,
2898	zero,
2899	[
2900	`5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `21`, `22`, `23`, `24`, `25`, `26`,
2901	`27`, `28`, `29`, `30`, `31`, `32`, `32`, `32`, `32`, `32`,
2902	],
2903	),
2904	`6` => simd_shuffle!(
2905	a,
2906	zero,
2907	[
2908	`6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `22`, `23`, `24`, `25`, `26`, `27`,
2909	`28`, `29`, `30`, `31`, `32`, `32`, `32`, `32`, `32`, `32`,
2910	],
2911	),
2912	`7` => simd_shuffle!(
2913	a,
2914	zero,
2915	[
2916	`7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `23`, `24`, `25`, `26`, `27`,
2917	`28`, `29`, `30`, `31`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2918	],
2919	),
2920	`8` => simd_shuffle!(
2921	a,
2922	zero,
2923	[
2924	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `24`, `25`, `26`, `27`, `28`,
2925	`29`, `30`, `31`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2926	],
2927	),
2928	`9` => simd_shuffle!(
2929	a,
2930	zero,
2931	[
2932	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `25`, `26`, `27`, `28`, `29`,
2933	`30`, `31`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2934	],
2935	),
2936	`10` => simd_shuffle!(
2937	a,
2938	zero,
2939	[
2940	`10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `26`, `27`, `28`, `29`, `30`,
2941	`31`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2942	],
2943	),
2944	`11` => simd_shuffle!(
2945	a,
2946	zero,
2947	[
2948	`11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `27`, `28`, `29`, `30`, `31`,
2949	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2950	],
2951	),
2952	`12` => simd_shuffle!(
2953	a,
2954	zero,
2955	[
2956	`12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `28`, `29`, `30`, `31`, `32`,
2957	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2958	],
2959	),
2960	`13` => simd_shuffle!(
2961	a,
2962	zero,
2963	[
2964	`13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `29`, `30`, `31`, `32`, `32`,
2965	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2966	],
2967	),
2968	`14` => simd_shuffle!(
2969	a,
2970	zero,
2971	[
2972	`14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `30`, `31`, `32`, `32`, `32`,
2973	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2974	],
2975	),
2976	`15` => simd_shuffle!(
2977	a,
2978	zero,
2979	[
2980	`14`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `31`, `32`, `32`, `32`, `32`,
2981	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2982	],
2983	),
2984	_ => zero,
2985	};
2986	transmute(r)
2987	}
2988
2989	/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
2990	/// zeros.
2991	///
2992	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi16)
2993	#[inline]
2994	#[target_feature(enable = "avx2")]
2995	#[cfg_attr(test, assert_instr(vpsrlw))]
2996	#[stable(feature = "simd_x86", since = "1.27.0")]
2997	pub unsafe fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
2998	transmute(src:psrlw(a:a.as_i16x16(), count:count.as_i16x8()))
2999	}
3000
3001	/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
3002	/// zeros.
3003	///
3004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi32)
3005	#[inline]
3006	#[target_feature(enable = "avx2")]
3007	#[cfg_attr(test, assert_instr(vpsrld))]
3008	#[stable(feature = "simd_x86", since = "1.27.0")]
3009	pub unsafe fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3010	transmute(src:psrld(a:a.as_i32x8(), count:count.as_i32x4()))
3011	}
3012
3013	/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
3014	/// zeros.
3015	///
3016	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi64)
3017	#[inline]
3018	#[target_feature(enable = "avx2")]
3019	#[cfg_attr(test, assert_instr(vpsrlq))]
3020	#[stable(feature = "simd_x86", since = "1.27.0")]
3021	pub unsafe fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3022	transmute(src:psrlq(a:a.as_i64x4(), count:count.as_i64x2()))
3023	}
3024
3025	/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
3026	/// zeros
3027	///
3028	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi16)
3029	#[inline]
3030	#[target_feature(enable = "avx2")]
3031	#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = `7`))]
3032	#[rustc_legacy_const_generics(`1`)]
3033	#[stable(feature = "simd_x86", since = "1.27.0")]
3034	pub unsafe fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3035	static_assert_uimm_bits!(IMM8, `8`);
3036	if IMM8 >= `16` {
3037	_mm256_setzero_si256()
3038	} else {
3039	transmute(src:simd_shr(lhs:a.as_u16x16(), rhs:u16x16::splat(IMM8 as u16)))
3040	}
3041	}
3042
3043	/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
3044	/// zeros
3045	///
3046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi32)
3047	#[inline]
3048	#[target_feature(enable = "avx2")]
3049	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `7`))]
3050	#[rustc_legacy_const_generics(`1`)]
3051	#[stable(feature = "simd_x86", since = "1.27.0")]
3052	pub unsafe fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3053	static_assert_uimm_bits!(IMM8, `8`);
3054	if IMM8 >= `32` {
3055	_mm256_setzero_si256()
3056	} else {
3057	transmute(src:simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8 as u32)))
3058	}
3059	}
3060
3061	/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
3062	/// zeros
3063	///
3064	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi64)
3065	#[inline]
3066	#[target_feature(enable = "avx2")]
3067	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `7`))]
3068	#[rustc_legacy_const_generics(`1`)]
3069	#[stable(feature = "simd_x86", since = "1.27.0")]
3070	pub unsafe fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3071	static_assert_uimm_bits!(IMM8, `8`);
3072	if IMM8 >= `64` {
3073	_mm256_setzero_si256()
3074	} else {
3075	transmute(src:simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64)))
3076	}
3077	}
3078
3079	/// Shifts packed 32-bit integers in `a` right by the amount specified by
3080	/// the corresponding element in `count` while shifting in zeros,
3081	///
3082	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi32)
3083	#[inline]
3084	#[target_feature(enable = "avx2")]
3085	#[cfg_attr(test, assert_instr(vpsrlvd))]
3086	#[stable(feature = "simd_x86", since = "1.27.0")]
3087	pub unsafe fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3088	transmute(src:psrlvd(a:a.as_i32x4(), count:count.as_i32x4()))
3089	}
3090
3091	/// Shifts packed 32-bit integers in `a` right by the amount specified by
3092	/// the corresponding element in `count` while shifting in zeros,
3093	///
3094	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi32)
3095	#[inline]
3096	#[target_feature(enable = "avx2")]
3097	#[cfg_attr(test, assert_instr(vpsrlvd))]
3098	#[stable(feature = "simd_x86", since = "1.27.0")]
3099	pub unsafe fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3100	transmute(src:psrlvd256(a:a.as_i32x8(), count:count.as_i32x8()))
3101	}
3102
3103	/// Shifts packed 64-bit integers in `a` right by the amount specified by
3104	/// the corresponding element in `count` while shifting in zeros,
3105	///
3106	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi64)
3107	#[inline]
3108	#[target_feature(enable = "avx2")]
3109	#[cfg_attr(test, assert_instr(vpsrlvq))]
3110	#[stable(feature = "simd_x86", since = "1.27.0")]
3111	pub unsafe fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3112	transmute(src:psrlvq(a:a.as_i64x2(), count:count.as_i64x2()))
3113	}
3114
3115	/// Shifts packed 64-bit integers in `a` right by the amount specified by
3116	/// the corresponding element in `count` while shifting in zeros,
3117	///
3118	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi64)
3119	#[inline]
3120	#[target_feature(enable = "avx2")]
3121	#[cfg_attr(test, assert_instr(vpsrlvq))]
3122	#[stable(feature = "simd_x86", since = "1.27.0")]
3123	pub unsafe fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3124	transmute(src:psrlvq256(a:a.as_i64x4(), count:count.as_i64x4()))
3125	}
3126
3127	// TODO _mm256_stream_load_si256 (__m256i const mem_addr)*
3128
3129	/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
3130	///
3131	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi16)
3132	#[inline]
3133	#[target_feature(enable = "avx2")]
3134	#[cfg_attr(test, assert_instr(vpsubw))]
3135	#[stable(feature = "simd_x86", since = "1.27.0")]
3136	pub unsafe fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3137	transmute(src:simd_sub(lhs:a.as_i16x16(), rhs:b.as_i16x16()))
3138	}
3139
3140	/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`
3141	///
3142	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi32)
3143	#[inline]
3144	#[target_feature(enable = "avx2")]
3145	#[cfg_attr(test, assert_instr(vpsubd))]
3146	#[stable(feature = "simd_x86", since = "1.27.0")]
3147	pub unsafe fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3148	transmute(src:simd_sub(lhs:a.as_i32x8(), rhs:b.as_i32x8()))
3149	}
3150
3151	/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`
3152	///
3153	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi64)
3154	#[inline]
3155	#[target_feature(enable = "avx2")]
3156	#[cfg_attr(test, assert_instr(vpsubq))]
3157	#[stable(feature = "simd_x86", since = "1.27.0")]
3158	pub unsafe fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3159	transmute(src:simd_sub(lhs:a.as_i64x4(), rhs:b.as_i64x4()))
3160	}
3161
3162	/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
3163	///
3164	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi8)
3165	#[inline]
3166	#[target_feature(enable = "avx2")]
3167	#[cfg_attr(test, assert_instr(vpsubb))]
3168	#[stable(feature = "simd_x86", since = "1.27.0")]
3169	pub unsafe fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3170	transmute(src:simd_sub(lhs:a.as_i8x32(), rhs:b.as_i8x32()))
3171	}
3172
3173	/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in
3174	/// `a` using saturation.
3175	///
3176	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi16)
3177	#[inline]
3178	#[target_feature(enable = "avx2")]
3179	#[cfg_attr(test, assert_instr(vpsubsw))]
3180	#[stable(feature = "simd_x86", since = "1.27.0")]
3181	pub unsafe fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3182	transmute(src:simd_saturating_sub(lhs:a.as_i16x16(), rhs:b.as_i16x16()))
3183	}
3184
3185	/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in
3186	/// `a` using saturation.
3187	///
3188	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi8)
3189	#[inline]
3190	#[target_feature(enable = "avx2")]
3191	#[cfg_attr(test, assert_instr(vpsubsb))]
3192	#[stable(feature = "simd_x86", since = "1.27.0")]
3193	pub unsafe fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3194	transmute(src:simd_saturating_sub(lhs:a.as_i8x32(), rhs:b.as_i8x32()))
3195	}
3196
3197	/// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit
3198	/// integers in `a` using saturation.
3199	///
3200	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu16)
3201	#[inline]
3202	#[target_feature(enable = "avx2")]
3203	#[cfg_attr(test, assert_instr(vpsubusw))]
3204	#[stable(feature = "simd_x86", since = "1.27.0")]
3205	pub unsafe fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3206	transmute(src:simd_saturating_sub(lhs:a.as_u16x16(), rhs:b.as_u16x16()))
3207	}
3208
3209	/// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit
3210	/// integers in `a` using saturation.
3211	///
3212	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu8)
3213	#[inline]
3214	#[target_feature(enable = "avx2")]
3215	#[cfg_attr(test, assert_instr(vpsubusb))]
3216	#[stable(feature = "simd_x86", since = "1.27.0")]
3217	pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3218	transmute(src:simd_saturating_sub(lhs:a.as_u8x32(), rhs:b.as_u8x32()))
3219	}
3220
3221	/// Unpacks and interleave 8-bit integers from the high half of each
3222	/// 128-bit lane in `a` and `b`.
3223	///
3224	/// ```rust
3225	/// #[cfg(target_arch = "x86")]
3226	/// use std::arch::x86::*;
3227	/// #[cfg(target_arch = "x86_64")]
3228	/// use std::arch::x86_64::*;
3229	///
3230	/// # fn main() {
3231	/// # if is_x86_feature_detected!("avx2") {
3232	/// # #[target_feature(enable = "avx2")]
3233	/// # unsafe fn worker() {
3234	/// let a = _mm256_setr_epi8(
3235	/// `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`,
3236	/// `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3237	/// );
3238	/// let b = _mm256_setr_epi8(
3239	/// `0`, `-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`, `-8`, `-9`, `-10`, `-11`, `-12`, `-13`, `-14`, `-15`,
3240	/// `-16`, `-17`, `-18`, `-19`, `-20`, `-21`, `-22`, `-23`, `-24`, `-25`, `-26`, `-27`, `-28`, `-29`,
3241	/// `-30`, `-31`,
3242	/// );
3243	///
3244	/// let c = _mm256_unpackhi_epi8(a, b);
3245	///
3246	/// let expected = _mm256_setr_epi8(
3247	/// `8`, `-8`, `9`, `-9`, `10`, `-10`, `11`, `-11`, `12`, `-12`, `13`, `-13`, `14`, `-14`, `15`, `-15`,
3248	/// `24`, `-24`, `25`, `-25`, `26`, `-26`, `27`, `-27`, `28`, `-28`, `29`, `-29`, `30`, `-30`, `31`,
3249	/// `-31`,
3250	/// );
3251	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3252	///
3253	/// # }
3254	/// # unsafe { worker(); }
3255	/// # }
3256	/// # }
3257	/// ```
3258	///
3259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi8)
3260	#[inline]
3261	#[target_feature(enable = "avx2")]
3262	#[cfg_attr(test, assert_instr(vpunpckhbw))]
3263	#[stable(feature = "simd_x86", since = "1.27.0")]
3264	pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3265	#[rustfmt::skip]
3266	let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3267	`8`, `40`, `9`, `41`, `10`, `42`, `11`, `43`,
3268	`12`, `44`, `13`, `45`, `14`, `46`, `15`, `47`,
3269	`24`, `56`, `25`, `57`, `26`, `58`, `27`, `59`,
3270	`28`, `60`, `29`, `61`, `30`, `62`, `31`, `63`,
3271	]);
3272	transmute(src:r)
3273	}
3274
3275	/// Unpacks and interleave 8-bit integers from the low half of each
3276	/// 128-bit lane of `a` and `b`.
3277	///
3278	/// ```rust
3279	/// #[cfg(target_arch = "x86")]
3280	/// use std::arch::x86::*;
3281	/// #[cfg(target_arch = "x86_64")]
3282	/// use std::arch::x86_64::*;
3283	///
3284	/// # fn main() {
3285	/// # if is_x86_feature_detected!("avx2") {
3286	/// # #[target_feature(enable = "avx2")]
3287	/// # unsafe fn worker() {
3288	/// let a = _mm256_setr_epi8(
3289	/// `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`,
3290	/// `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3291	/// );
3292	/// let b = _mm256_setr_epi8(
3293	/// `0`, `-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`, `-8`, `-9`, `-10`, `-11`, `-12`, `-13`, `-14`, `-15`,
3294	/// `-16`, `-17`, `-18`, `-19`, `-20`, `-21`, `-22`, `-23`, `-24`, `-25`, `-26`, `-27`, `-28`, `-29`,
3295	/// `-30`, `-31`,
3296	/// );
3297	///
3298	/// let c = _mm256_unpacklo_epi8(a, b);
3299	///
3300	/// let expected = _mm256_setr_epi8(
3301	/// `0`, `0`, `1`, `-1`, `2`, `-2`, `3`, `-3`, `4`, `-4`, `5`, `-5`, `6`, `-6`, `7`, `-7`, `16`, `-16`, `17`,
3302	/// `-17`, `18`, `-18`, `19`, `-19`, `20`, `-20`, `21`, `-21`, `22`, `-22`, `23`, `-23`,
3303	/// );
3304	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3305	///
3306	/// # }
3307	/// # unsafe { worker(); }
3308	/// # }
3309	/// # }
3310	/// ```
3311	///
3312	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi8)
3313	#[inline]
3314	#[target_feature(enable = "avx2")]
3315	#[cfg_attr(test, assert_instr(vpunpcklbw))]
3316	#[stable(feature = "simd_x86", since = "1.27.0")]
3317	pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3318	#[rustfmt::skip]
3319	let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3320	`0`, `32`, `1`, `33`, `2`, `34`, `3`, `35`,
3321	`4`, `36`, `5`, `37`, `6`, `38`, `7`, `39`,
3322	`16`, `48`, `17`, `49`, `18`, `50`, `19`, `51`,
3323	`20`, `52`, `21`, `53`, `22`, `54`, `23`, `55`,
3324	]);
3325	transmute(src:r)
3326	}
3327
3328	/// Unpacks and interleave 16-bit integers from the high half of each
3329	/// 128-bit lane of `a` and `b`.
3330	///
3331	/// ```rust
3332	/// #[cfg(target_arch = "x86")]
3333	/// use std::arch::x86::*;
3334	/// #[cfg(target_arch = "x86_64")]
3335	/// use std::arch::x86_64::*;
3336	///
3337	/// # fn main() {
3338	/// # if is_x86_feature_detected!("avx2") {
3339	/// # #[target_feature(enable = "avx2")]
3340	/// # unsafe fn worker() {
3341	/// let a = _mm256_setr_epi16(
3342	/// `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3343	/// );
3344	/// let b = _mm256_setr_epi16(
3345	/// `0`, `-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`, `-8`, `-9`, `-10`, `-11`, `-12`, `-13`, `-14`, `-15`,
3346	/// );
3347	///
3348	/// let c = _mm256_unpackhi_epi16(a, b);
3349	///
3350	/// let expected = _mm256_setr_epi16(
3351	/// `4`, `-4`, `5`, `-5`, `6`, `-6`, `7`, `-7`, `12`, `-12`, `13`, `-13`, `14`, `-14`, `15`, `-15`,
3352	/// );
3353	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3354	///
3355	/// # }
3356	/// # unsafe { worker(); }
3357	/// # }
3358	/// # }
3359	/// ```
3360	///
3361	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi16)
3362	#[inline]
3363	#[target_feature(enable = "avx2")]
3364	#[cfg_attr(test, assert_instr(vpunpckhwd))]
3365	#[stable(feature = "simd_x86", since = "1.27.0")]
3366	pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3367	let r: i16x16 = simd_shuffle!(
3368	a.as_i16x16(),
3369	b.as_i16x16(),
3370	[`4`, `20`, `5`, `21`, `6`, `22`, `7`, `23`, `12`, `28`, `13`, `29`, `14`, `30`, `15`, `31`],
3371	);
3372	transmute(src:r)
3373	}
3374
3375	/// Unpacks and interleave 16-bit integers from the low half of each
3376	/// 128-bit lane of `a` and `b`.
3377	///
3378	/// ```rust
3379	/// #[cfg(target_arch = "x86")]
3380	/// use std::arch::x86::*;
3381	/// #[cfg(target_arch = "x86_64")]
3382	/// use std::arch::x86_64::*;
3383	///
3384	/// # fn main() {
3385	/// # if is_x86_feature_detected!("avx2") {
3386	/// # #[target_feature(enable = "avx2")]
3387	/// # unsafe fn worker() {
3388	///
3389	/// let a = _mm256_setr_epi16(
3390	/// `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3391	/// );
3392	/// let b = _mm256_setr_epi16(
3393	/// `0`, `-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`, `-8`, `-9`, `-10`, `-11`, `-12`, `-13`, `-14`, `-15`,
3394	/// );
3395	///
3396	/// let c = _mm256_unpacklo_epi16(a, b);
3397	///
3398	/// let expected = _mm256_setr_epi16(
3399	/// `0`, `0`, `1`, `-1`, `2`, `-2`, `3`, `-3`, `8`, `-8`, `9`, `-9`, `10`, `-10`, `11`, `-11`,
3400	/// );
3401	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3402	///
3403	/// # }
3404	/// # unsafe { worker(); }
3405	/// # }
3406	/// # }
3407	/// ```
3408	///
3409	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi16)
3410	#[inline]
3411	#[target_feature(enable = "avx2")]
3412	#[cfg_attr(test, assert_instr(vpunpcklwd))]
3413	#[stable(feature = "simd_x86", since = "1.27.0")]
3414	pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3415	let r: i16x16 = simd_shuffle!(
3416	a.as_i16x16(),
3417	b.as_i16x16(),
3418	[`0`, `16`, `1`, `17`, `2`, `18`, `3`, `19`, `8`, `24`, `9`, `25`, `10`, `26`, `11`, `27`],
3419	);
3420	transmute(src:r)
3421	}
3422
3423	/// Unpacks and interleave 32-bit integers from the high half of each
3424	/// 128-bit lane of `a` and `b`.
3425	///
3426	/// ```rust
3427	/// #[cfg(target_arch = "x86")]
3428	/// use std::arch::x86::*;
3429	/// #[cfg(target_arch = "x86_64")]
3430	/// use std::arch::x86_64::*;
3431	///
3432	/// # fn main() {
3433	/// # if is_x86_feature_detected!("avx2") {
3434	/// # #[target_feature(enable = "avx2")]
3435	/// # unsafe fn worker() {
3436	/// let a = _mm256_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3437	/// let b = _mm256_setr_epi32(`0`, `-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`);
3438	///
3439	/// let c = _mm256_unpackhi_epi32(a, b);
3440	///
3441	/// let expected = _mm256_setr_epi32(`2`, `-2`, `3`, `-3`, `6`, `-6`, `7`, `-7`);
3442	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3443	///
3444	/// # }
3445	/// # unsafe { worker(); }
3446	/// # }
3447	/// # }
3448	/// ```
3449	///
3450	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi32)
3451	#[inline]
3452	#[target_feature(enable = "avx2")]
3453	#[cfg_attr(test, assert_instr(vunpckhps))]
3454	#[stable(feature = "simd_x86", since = "1.27.0")]
3455	pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3456	let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [`2`, `10`, `3`, `11`, `6`, `14`, `7`, `15`]);
3457	transmute(src:r)
3458	}
3459
3460	/// Unpacks and interleave 32-bit integers from the low half of each
3461	/// 128-bit lane of `a` and `b`.
3462	///
3463	/// ```rust
3464	/// #[cfg(target_arch = "x86")]
3465	/// use std::arch::x86::*;
3466	/// #[cfg(target_arch = "x86_64")]
3467	/// use std::arch::x86_64::*;
3468	///
3469	/// # fn main() {
3470	/// # if is_x86_feature_detected!("avx2") {
3471	/// # #[target_feature(enable = "avx2")]
3472	/// # unsafe fn worker() {
3473	/// let a = _mm256_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3474	/// let b = _mm256_setr_epi32(`0`, `-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`);
3475	///
3476	/// let c = _mm256_unpacklo_epi32(a, b);
3477	///
3478	/// let expected = _mm256_setr_epi32(`0`, `0`, `1`, `-1`, `4`, `-4`, `5`, `-5`);
3479	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3480	///
3481	/// # }
3482	/// # unsafe { worker(); }
3483	/// # }
3484	/// # }
3485	/// ```
3486	///
3487	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi32)
3488	#[inline]
3489	#[target_feature(enable = "avx2")]
3490	#[cfg_attr(test, assert_instr(vunpcklps))]
3491	#[stable(feature = "simd_x86", since = "1.27.0")]
3492	pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3493	let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [`0`, `8`, `1`, `9`, `4`, `12`, `5`, `13`]);
3494	transmute(src:r)
3495	}
3496
3497	/// Unpacks and interleave 64-bit integers from the high half of each
3498	/// 128-bit lane of `a` and `b`.
3499	///
3500	/// ```rust
3501	/// #[cfg(target_arch = "x86")]
3502	/// use std::arch::x86::*;
3503	/// #[cfg(target_arch = "x86_64")]
3504	/// use std::arch::x86_64::*;
3505	///
3506	/// # fn main() {
3507	/// # if is_x86_feature_detected!("avx2") {
3508	/// # #[target_feature(enable = "avx2")]
3509	/// # unsafe fn worker() {
3510	/// let a = _mm256_setr_epi64x(`0`, `1`, `2`, `3`);
3511	/// let b = _mm256_setr_epi64x(`0`, `-1`, `-2`, `-3`);
3512	///
3513	/// let c = _mm256_unpackhi_epi64(a, b);
3514	///
3515	/// let expected = _mm256_setr_epi64x(`1`, `-1`, `3`, `-3`);
3516	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3517	///
3518	/// # }
3519	/// # unsafe { worker(); }
3520	/// # }
3521	/// # }
3522	/// ```
3523	///
3524	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi64)
3525	#[inline]
3526	#[target_feature(enable = "avx2")]
3527	#[cfg_attr(test, assert_instr(vunpckhpd))]
3528	#[stable(feature = "simd_x86", since = "1.27.0")]
3529	pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3530	let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [`1`, `5`, `3`, `7`]);
3531	transmute(src:r)
3532	}
3533
3534	/// Unpacks and interleave 64-bit integers from the low half of each
3535	/// 128-bit lane of `a` and `b`.
3536	///
3537	/// ```rust
3538	/// #[cfg(target_arch = "x86")]
3539	/// use std::arch::x86::*;
3540	/// #[cfg(target_arch = "x86_64")]
3541	/// use std::arch::x86_64::*;
3542	///
3543	/// # fn main() {
3544	/// # if is_x86_feature_detected!("avx2") {
3545	/// # #[target_feature(enable = "avx2")]
3546	/// # unsafe fn worker() {
3547	/// let a = _mm256_setr_epi64x(`0`, `1`, `2`, `3`);
3548	/// let b = _mm256_setr_epi64x(`0`, `-1`, `-2`, `-3`);
3549	///
3550	/// let c = _mm256_unpacklo_epi64(a, b);
3551	///
3552	/// let expected = _mm256_setr_epi64x(`0`, `0`, `2`, `-2`);
3553	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3554	///
3555	/// # }
3556	/// # unsafe { worker(); }
3557	/// # }
3558	/// # }
3559	/// ```
3560	///
3561	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi64)
3562	#[inline]
3563	#[target_feature(enable = "avx2")]
3564	#[cfg_attr(test, assert_instr(vunpcklpd))]
3565	#[stable(feature = "simd_x86", since = "1.27.0")]
3566	pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3567	let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [`0`, `4`, `2`, `6`]);
3568	transmute(src:r)
3569	}
3570
3571	/// Computes the bitwise XOR of 256 bits (representing integer data)
3572	/// in `a` and `b`
3573	///
3574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_si256)
3575	#[inline]
3576	#[target_feature(enable = "avx2")]
3577	#[cfg_attr(test, assert_instr(vxorps))]
3578	#[stable(feature = "simd_x86", since = "1.27.0")]
3579	pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3580	transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4()))
3581	}
3582
3583	/// Extracts an 8-bit integer from `a`, selected with `INDEX`. Returns a 32-bit
3584	/// integer containing the zero-extended integer data.
3585	///
3586	/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
3587	///
3588	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi8)
3589	#[inline]
3590	#[target_feature(enable = "avx2")]
3591	// This intrinsic has no corresponding instruction.
3592	#[rustc_legacy_const_generics(`1`)]
3593	#[stable(feature = "simd_x86", since = "1.27.0")]
3594	pub unsafe fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3595	static_assert_uimm_bits!(INDEX, `5`);
3596	simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32
3597	}
3598
3599	/// Extracts a 16-bit integer from `a`, selected with `INDEX`. Returns a 32-bit
3600	/// integer containing the zero-extended integer data.
3601	///
3602	/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
3603	///
3604	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi16)
3605	#[inline]
3606	#[target_feature(enable = "avx2")]
3607	// This intrinsic has no corresponding instruction.
3608	#[rustc_legacy_const_generics(`1`)]
3609	#[stable(feature = "simd_x86", since = "1.27.0")]
3610	pub unsafe fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3611	static_assert_uimm_bits!(INDEX, `4`);
3612	simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32
3613	}
3614
3615	/// Extracts a 32-bit integer from `a`, selected with `INDEX`.
3616	///
3617	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32)
3618	#[inline]
3619	#[target_feature(enable = "avx2")]
3620	// This intrinsic has no corresponding instruction.
3621	#[rustc_legacy_const_generics(`1`)]
3622	#[stable(feature = "simd_x86", since = "1.27.0")]
3623	pub unsafe fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
3624	static_assert_uimm_bits!(INDEX, `3`);
3625	simd_extract!(a.as_i32x8(), INDEX as u32)
3626	}
3627
3628	/// Returns the first element of the input vector of `[4 x double]`.
3629	///
3630	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsd_f64)
3631	#[inline]
3632	#[target_feature(enable = "avx2")]
3633	//#[cfg_attr(test, assert_instr(movsd))] FIXME
3634	#[stable(feature = "simd_x86", since = "1.27.0")]
3635	pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 {
3636	simd_extract!(a, `0`)
3637	}
3638
3639	/// Returns the first element of the input vector of `[8 x i32]`.
3640	///
3641	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32)
3642	#[inline]
3643	#[target_feature(enable = "avx2")]
3644	#[stable(feature = "simd_x86", since = "1.27.0")]
3645	pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
3646	simd_extract!(a.as_i32x8(), `0`)
3647	}
3648
3649	#[allow(improper_ctypes)]
3650	extern "C" {
3651	#[link_name = "llvm.x86.avx2.phadd.w"]
3652	fn phaddw(a: i16x16, b: i16x16) -> i16x16;
3653	#[link_name = "llvm.x86.avx2.phadd.d"]
3654	fn phaddd(a: i32x8, b: i32x8) -> i32x8;
3655	#[link_name = "llvm.x86.avx2.phadd.sw"]
3656	fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3657	#[link_name = "llvm.x86.avx2.phsub.w"]
3658	fn phsubw(a: i16x16, b: i16x16) -> i16x16;
3659	#[link_name = "llvm.x86.avx2.phsub.d"]
3660	fn phsubd(a: i32x8, b: i32x8) -> i32x8;
3661	#[link_name = "llvm.x86.avx2.phsub.sw"]
3662	fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3663	#[link_name = "llvm.x86.avx2.pmadd.wd"]
3664	fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3665	#[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3666	fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
3667	#[link_name = "llvm.x86.avx2.maskload.d"]
3668	fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
3669	#[link_name = "llvm.x86.avx2.maskload.d.256"]
3670	fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
3671	#[link_name = "llvm.x86.avx2.maskload.q"]
3672	fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
3673	#[link_name = "llvm.x86.avx2.maskload.q.256"]
3674	fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
3675	#[link_name = "llvm.x86.avx2.maskstore.d"]
3676	fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
3677	#[link_name = "llvm.x86.avx2.maskstore.d.256"]
3678	fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
3679	#[link_name = "llvm.x86.avx2.maskstore.q"]
3680	fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
3681	#[link_name = "llvm.x86.avx2.maskstore.q.256"]
3682	fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
3683	#[link_name = "llvm.x86.avx2.mpsadbw"]
3684	fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
3685	#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3686	fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3687	#[link_name = "llvm.x86.avx2.packsswb"]
3688	fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3689	#[link_name = "llvm.x86.avx2.packssdw"]
3690	fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3691	#[link_name = "llvm.x86.avx2.packuswb"]
3692	fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3693	#[link_name = "llvm.x86.avx2.packusdw"]
3694	fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3695	#[link_name = "llvm.x86.avx2.psad.bw"]
3696	fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3697	#[link_name = "llvm.x86.avx2.psign.b"]
3698	fn psignb(a: i8x32, b: i8x32) -> i8x32;
3699	#[link_name = "llvm.x86.avx2.psign.w"]
3700	fn psignw(a: i16x16, b: i16x16) -> i16x16;
3701	#[link_name = "llvm.x86.avx2.psign.d"]
3702	fn psignd(a: i32x8, b: i32x8) -> i32x8;
3703	#[link_name = "llvm.x86.avx2.psll.w"]
3704	fn psllw(a: i16x16, count: i16x8) -> i16x16;
3705	#[link_name = "llvm.x86.avx2.psll.d"]
3706	fn pslld(a: i32x8, count: i32x4) -> i32x8;
3707	#[link_name = "llvm.x86.avx2.psll.q"]
3708	fn psllq(a: i64x4, count: i64x2) -> i64x4;
3709	#[link_name = "llvm.x86.avx2.psllv.d"]
3710	fn psllvd(a: i32x4, count: i32x4) -> i32x4;
3711	#[link_name = "llvm.x86.avx2.psllv.d.256"]
3712	fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
3713	#[link_name = "llvm.x86.avx2.psllv.q"]
3714	fn psllvq(a: i64x2, count: i64x2) -> i64x2;
3715	#[link_name = "llvm.x86.avx2.psllv.q.256"]
3716	fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
3717	#[link_name = "llvm.x86.avx2.psra.w"]
3718	fn psraw(a: i16x16, count: i16x8) -> i16x16;
3719	#[link_name = "llvm.x86.avx2.psra.d"]
3720	fn psrad(a: i32x8, count: i32x4) -> i32x8;
3721	#[link_name = "llvm.x86.avx2.psrav.d"]
3722	fn psravd(a: i32x4, count: i32x4) -> i32x4;
3723	#[link_name = "llvm.x86.avx2.psrav.d.256"]
3724	fn psravd256(a: i32x8, count: i32x8) -> i32x8;
3725	#[link_name = "llvm.x86.avx2.psrl.w"]
3726	fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3727	#[link_name = "llvm.x86.avx2.psrl.d"]
3728	fn psrld(a: i32x8, count: i32x4) -> i32x8;
3729	#[link_name = "llvm.x86.avx2.psrl.q"]
3730	fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3731	#[link_name = "llvm.x86.avx2.psrlv.d"]
3732	fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
3733	#[link_name = "llvm.x86.avx2.psrlv.d.256"]
3734	fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
3735	#[link_name = "llvm.x86.avx2.psrlv.q"]
3736	fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
3737	#[link_name = "llvm.x86.avx2.psrlv.q.256"]
3738	fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
3739	#[link_name = "llvm.x86.avx2.pshuf.b"]
3740	fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3741	#[link_name = "llvm.x86.avx2.permd"]
3742	fn permd(a: u32x8, b: u32x8) -> u32x8;
3743	#[link_name = "llvm.x86.avx2.permps"]
3744	fn permps(a: __m256, b: i32x8) -> __m256;
3745	#[link_name = "llvm.x86.avx2.vperm2i128"]
3746	fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4;
3747	#[link_name = "llvm.x86.avx2.gather.d.d"]
3748	fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3749	#[link_name = "llvm.x86.avx2.gather.d.d.256"]
3750	fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3751	#[link_name = "llvm.x86.avx2.gather.d.q"]
3752	fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3753	#[link_name = "llvm.x86.avx2.gather.d.q.256"]
3754	fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3755	#[link_name = "llvm.x86.avx2.gather.q.d"]
3756	fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3757	#[link_name = "llvm.x86.avx2.gather.q.d.256"]
3758	fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3759	#[link_name = "llvm.x86.avx2.gather.q.q"]
3760	fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3761	#[link_name = "llvm.x86.avx2.gather.q.q.256"]
3762	fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3763	#[link_name = "llvm.x86.avx2.gather.d.pd"]
3764	fn pgatherdpd(
3765	src: __m128d,
3766	slice: *const i8,
3767	offsets: i32x4,
3768	mask: __m128d,
3769	scale: i8,
3770	) -> __m128d;
3771	#[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3772	fn vpgatherdpd(
3773	src: __m256d,
3774	slice: *const i8,
3775	offsets: i32x4,
3776	mask: __m256d,
3777	scale: i8,
3778	) -> __m256d;
3779	#[link_name = "llvm.x86.avx2.gather.q.pd"]
3780	fn pgatherqpd(
3781	src: __m128d,
3782	slice: *const i8,
3783	offsets: i64x2,
3784	mask: __m128d,
3785	scale: i8,
3786	) -> __m128d;
3787	#[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3788	fn vpgatherqpd(
3789	src: __m256d,
3790	slice: *const i8,
3791	offsets: i64x4,
3792	mask: __m256d,
3793	scale: i8,
3794	) -> __m256d;
3795	#[link_name = "llvm.x86.avx2.gather.d.ps"]
3796	fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3797	-> __m128;
3798	#[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3799	fn vpgatherdps(
3800	src: __m256,
3801	slice: *const i8,
3802	offsets: i32x8,
3803	mask: __m256,
3804	scale: i8,
3805	) -> __m256;
3806	#[link_name = "llvm.x86.avx2.gather.q.ps"]
3807	fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3808	-> __m128;
3809	#[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3810	fn vpgatherqps(
3811	src: __m128,
3812	slice: *const i8,
3813	offsets: i64x4,
3814	mask: __m128,
3815	scale: i8,
3816	) -> __m128;
3817	#[link_name = "llvm.x86.avx2.psll.dq"]
3818	fn vpslldq(a: i64x4, b: i32) -> i64x4;
3819	#[link_name = "llvm.x86.avx2.psrl.dq"]
3820	fn vpsrldq(a: i64x4, b: i32) -> i64x4;
3821	}
3822
3823	#[cfg(test)]
3824	mod tests {
3825
3826	use stdarch_test::simd_test;
3827
3828	use crate::core_arch::x86::*;
3829
3830	#[simd_test(enable = "avx2")]
3831	unsafe fn test_mm256_abs_epi32() {
3832	#[rustfmt::skip]
3833	let a = _mm256_setr_epi32(
3834	`0`, `1`, `-1`, i32::MAX,
3835	i32::MIN, `100`, `-100`, `-32`,
3836	);
3837	let r = _mm256_abs_epi32(a);
3838	#[rustfmt::skip]
3839	let e = _mm256_setr_epi32(
3840	`0`, `1`, `1`, i32::MAX,
3841	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
3842	);
3843	assert_eq_m256i(r, e);
3844	}
3845
3846	#[simd_test(enable = "avx2")]
3847	unsafe fn test_mm256_abs_epi16() {
3848	#[rustfmt::skip]
3849	let a = _mm256_setr_epi16(
3850	`0`, `1`, `-1`, `2`, `-2`, `3`, `-3`, `4`,
3851	`-4`, `5`, `-5`, i16::MAX, i16::MIN, `100`, `-100`, `-32`,
3852	);
3853	let r = _mm256_abs_epi16(a);
3854	#[rustfmt::skip]
3855	let e = _mm256_setr_epi16(
3856	`0`, `1`, `1`, `2`, `2`, `3`, `3`, `4`,
3857	`4`, `5`, `5`, i16::MAX, i16::MAX.wrapping_add(`1`), `100`, `100`, `32`,
3858	);
3859	assert_eq_m256i(r, e);
3860	}
3861
3862	#[simd_test(enable = "avx2")]
3863	unsafe fn test_mm256_abs_epi8() {
3864	#[rustfmt::skip]
3865	let a = _mm256_setr_epi8(
3866	`0`, `1`, `-1`, `2`, `-2`, `3`, `-3`, `4`,
3867	`-4`, `5`, `-5`, i8::MAX, i8::MIN, `100`, `-100`, `-32`,
3868	`0`, `1`, `-1`, `2`, `-2`, `3`, `-3`, `4`,
3869	`-4`, `5`, `-5`, i8::MAX, i8::MIN, `100`, `-100`, `-32`,
3870	);
3871	let r = _mm256_abs_epi8(a);
3872	#[rustfmt::skip]
3873	let e = _mm256_setr_epi8(
3874	`0`, `1`, `1`, `2`, `2`, `3`, `3`, `4`,
3875	`4`, `5`, `5`, i8::MAX, i8::MAX.wrapping_add(`1`), `100`, `100`, `32`,
3876	`0`, `1`, `1`, `2`, `2`, `3`, `3`, `4`,
3877	`4`, `5`, `5`, i8::MAX, i8::MAX.wrapping_add(`1`), `100`, `100`, `32`,
3878	);
3879	assert_eq_m256i(r, e);
3880	}
3881
3882	#[simd_test(enable = "avx2")]
3883	unsafe fn test_mm256_add_epi64() {
3884	let a = _mm256_setr_epi64x(`-10`, `0`, `100`, `1_000_000_000`);
3885	let b = _mm256_setr_epi64x(`-1`, `0`, `1`, `2`);
3886	let r = _mm256_add_epi64(a, b);
3887	let e = _mm256_setr_epi64x(`-11`, `0`, `101`, `1_000_000_002`);
3888	assert_eq_m256i(r, e);
3889	}
3890
3891	#[simd_test(enable = "avx2")]
3892	unsafe fn test_mm256_add_epi32() {
3893	let a = _mm256_setr_epi32(`-1`, `0`, `1`, `2`, `3`, `4`, `5`, `6`);
3894	let b = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
3895	let r = _mm256_add_epi32(a, b);
3896	let e = _mm256_setr_epi32(`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`);
3897	assert_eq_m256i(r, e);
3898	}
3899
3900	#[simd_test(enable = "avx2")]
3901	unsafe fn test_mm256_add_epi16() {
3902	#[rustfmt::skip]
3903	let a = _mm256_setr_epi16(
3904	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3905	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3906	);
3907	#[rustfmt::skip]
3908	let b = _mm256_setr_epi16(
3909	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3910	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3911	);
3912	let r = _mm256_add_epi16(a, b);
3913	#[rustfmt::skip]
3914	let e = _mm256_setr_epi16(
3915	`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`,
3916	`16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`,
3917	);
3918	assert_eq_m256i(r, e);
3919	}
3920
3921	#[simd_test(enable = "avx2")]
3922	unsafe fn test_mm256_add_epi8() {
3923	#[rustfmt::skip]
3924	let a = _mm256_setr_epi8(
3925	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3926	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3927	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
3928	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3929	);
3930	#[rustfmt::skip]
3931	let b = _mm256_setr_epi8(
3932	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3933	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3934	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
3935	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3936	);
3937	let r = _mm256_add_epi8(a, b);
3938	#[rustfmt::skip]
3939	let e = _mm256_setr_epi8(
3940	`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`,
3941	`16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`,
3942	`32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
3943	`48`, `50`, `52`, `54`, `56`, `58`, `60`, `62`,
3944	);
3945	assert_eq_m256i(r, e);
3946	}
3947
3948	#[simd_test(enable = "avx2")]
3949	unsafe fn test_mm256_adds_epi8() {
3950	#[rustfmt::skip]
3951	let a = _mm256_setr_epi8(
3952	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3953	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3954	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
3955	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3956	);
3957	#[rustfmt::skip]
3958	let b = _mm256_setr_epi8(
3959	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`,
3960	`40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
3961	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`,
3962	`56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`,
3963	);
3964	let r = _mm256_adds_epi8(a, b);
3965	#[rustfmt::skip]
3966	let e = _mm256_setr_epi8(
3967	`32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
3968	`48`, `50`, `52`, `54`, `56`, `58`, `60`, `62`,
3969	`64`, `66`, `68`, `70`, `72`, `74`, `76`, `78`,
3970	`80`, `82`, `84`, `86`, `88`, `90`, `92`, `94`,
3971	);
3972	assert_eq_m256i(r, e);
3973	}
3974
3975	#[simd_test(enable = "avx2")]
3976	unsafe fn test_mm256_adds_epi8_saturate_positive() {
3977	let a = _mm256_set1_epi8(`0x7F`);
3978	let b = _mm256_set1_epi8(`1`);
3979	let r = _mm256_adds_epi8(a, b);
3980	assert_eq_m256i(r, a);
3981	}
3982
3983	#[simd_test(enable = "avx2")]
3984	unsafe fn test_mm256_adds_epi8_saturate_negative() {
3985	let a = _mm256_set1_epi8(`-0x80`);
3986	let b = _mm256_set1_epi8(`-1`);
3987	let r = _mm256_adds_epi8(a, b);
3988	assert_eq_m256i(r, a);
3989	}
3990
3991	#[simd_test(enable = "avx2")]
3992	unsafe fn test_mm256_adds_epi16() {
3993	#[rustfmt::skip]
3994	let a = _mm256_setr_epi16(
3995	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3996	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3997	);
3998	#[rustfmt::skip]
3999	let b = _mm256_setr_epi16(
4000	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`,
4001	`40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
4002	);
4003	let r = _mm256_adds_epi16(a, b);
4004	#[rustfmt::skip]
4005	let e = _mm256_setr_epi16(
4006	`32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
4007	`48`, `50`, `52`, `54`, `56`, `58`, `60`, `62`,
4008	);
4009
4010	assert_eq_m256i(r, e);
4011	}
4012
4013	#[simd_test(enable = "avx2")]
4014	unsafe fn test_mm256_adds_epi16_saturate_positive() {
4015	let a = _mm256_set1_epi16(`0x7FFF`);
4016	let b = _mm256_set1_epi16(`1`);
4017	let r = _mm256_adds_epi16(a, b);
4018	assert_eq_m256i(r, a);
4019	}
4020
4021	#[simd_test(enable = "avx2")]
4022	unsafe fn test_mm256_adds_epi16_saturate_negative() {
4023	let a = _mm256_set1_epi16(`-0x8000`);
4024	let b = _mm256_set1_epi16(`-1`);
4025	let r = _mm256_adds_epi16(a, b);
4026	assert_eq_m256i(r, a);
4027	}
4028
4029	#[simd_test(enable = "avx2")]
4030	unsafe fn test_mm256_adds_epu8() {
4031	#[rustfmt::skip]
4032	let a = _mm256_setr_epi8(
4033	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4034	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4035	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
4036	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
4037	);
4038	#[rustfmt::skip]
4039	let b = _mm256_setr_epi8(
4040	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`,
4041	`40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
4042	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`,
4043	`56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`,
4044	);
4045	let r = _mm256_adds_epu8(a, b);
4046	#[rustfmt::skip]
4047	let e = _mm256_setr_epi8(
4048	`32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
4049	`48`, `50`, `52`, `54`, `56`, `58`, `60`, `62`,
4050	`64`, `66`, `68`, `70`, `72`, `74`, `76`, `78`,
4051	`80`, `82`, `84`, `86`, `88`, `90`, `92`, `94`,
4052	);
4053	assert_eq_m256i(r, e);
4054	}
4055
4056	#[simd_test(enable = "avx2")]
4057	unsafe fn test_mm256_adds_epu8_saturate() {
4058	let a = _mm256_set1_epi8(!`0`);
4059	let b = _mm256_set1_epi8(`1`);
4060	let r = _mm256_adds_epu8(a, b);
4061	assert_eq_m256i(r, a);
4062	}
4063
4064	#[simd_test(enable = "avx2")]
4065	unsafe fn test_mm256_adds_epu16() {
4066	#[rustfmt::skip]
4067	let a = _mm256_setr_epi16(
4068	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4069	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4070	);
4071	#[rustfmt::skip]
4072	let b = _mm256_setr_epi16(
4073	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`,
4074	`40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
4075	);
4076	let r = _mm256_adds_epu16(a, b);
4077	#[rustfmt::skip]
4078	let e = _mm256_setr_epi16(
4079	`32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
4080	`48`, `50`, `52`, `54`, `56`, `58`, `60`, `62`,
4081	);
4082
4083	assert_eq_m256i(r, e);
4084	}
4085
4086	#[simd_test(enable = "avx2")]
4087	unsafe fn test_mm256_adds_epu16_saturate() {
4088	let a = _mm256_set1_epi16(!`0`);
4089	let b = _mm256_set1_epi16(`1`);
4090	let r = _mm256_adds_epu16(a, b);
4091	assert_eq_m256i(r, a);
4092	}
4093
4094	#[simd_test(enable = "avx2")]
4095	unsafe fn test_mm256_and_si256() {
4096	let a = _mm256_set1_epi8(`5`);
4097	let b = _mm256_set1_epi8(`3`);
4098	let got = _mm256_and_si256(a, b);
4099	assert_eq_m256i(got, _mm256_set1_epi8(`1`));
4100	}
4101
4102	#[simd_test(enable = "avx2")]
4103	unsafe fn test_mm256_andnot_si256() {
4104	let a = _mm256_set1_epi8(`5`);
4105	let b = _mm256_set1_epi8(`3`);
4106	let got = _mm256_andnot_si256(a, b);
4107	assert_eq_m256i(got, _mm256_set1_epi8(`2`));
4108	}
4109
4110	#[simd_test(enable = "avx2")]
4111	unsafe fn test_mm256_avg_epu8() {
4112	let (a, b) = (_mm256_set1_epi8(`3`), _mm256_set1_epi8(`9`));
4113	let r = _mm256_avg_epu8(a, b);
4114	assert_eq_m256i(r, _mm256_set1_epi8(`6`));
4115	}
4116
4117	#[simd_test(enable = "avx2")]
4118	unsafe fn test_mm256_avg_epu16() {
4119	let (a, b) = (_mm256_set1_epi16(`3`), _mm256_set1_epi16(`9`));
4120	let r = _mm256_avg_epu16(a, b);
4121	assert_eq_m256i(r, _mm256_set1_epi16(`6`));
4122	}
4123
4124	#[simd_test(enable = "avx2")]
4125	unsafe fn test_mm_blend_epi32() {
4126	let (a, b) = (_mm_set1_epi32(`3`), _mm_set1_epi32(`9`));
4127	let e = _mm_setr_epi32(`9`, `3`, `3`, `3`);
4128	let r = _mm_blend_epi32::<`0x01`>(a, b);
4129	assert_eq_m128i(r, e);
4130
4131	let r = _mm_blend_epi32::<`0x0E`>(b, a);
4132	assert_eq_m128i(r, e);
4133	}
4134
4135	#[simd_test(enable = "avx2")]
4136	unsafe fn test_mm256_blend_epi32() {
4137	let (a, b) = (_mm256_set1_epi32(`3`), _mm256_set1_epi32(`9`));
4138	let e = _mm256_setr_epi32(`9`, `3`, `3`, `3`, `3`, `3`, `3`, `3`);
4139	let r = _mm256_blend_epi32::<`0x01`>(a, b);
4140	assert_eq_m256i(r, e);
4141
4142	let e = _mm256_setr_epi32(`3`, `9`, `3`, `3`, `3`, `3`, `3`, `9`);
4143	let r = _mm256_blend_epi32::<`0x82`>(a, b);
4144	assert_eq_m256i(r, e);
4145
4146	let e = _mm256_setr_epi32(`3`, `3`, `9`, `9`, `9`, `9`, `9`, `3`);
4147	let r = _mm256_blend_epi32::<`0x7C`>(a, b);
4148	assert_eq_m256i(r, e);
4149	}
4150
4151	#[simd_test(enable = "avx2")]
4152	unsafe fn test_mm256_blend_epi16() {
4153	let (a, b) = (_mm256_set1_epi16(`3`), _mm256_set1_epi16(`9`));
4154	let e = _mm256_setr_epi16(`9`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `9`, `3`, `3`, `3`, `3`, `3`, `3`, `3`);
4155	let r = _mm256_blend_epi16::<`0x01`>(a, b);
4156	assert_eq_m256i(r, e);
4157
4158	let r = _mm256_blend_epi16::<`0xFE`>(b, a);
4159	assert_eq_m256i(r, e);
4160	}
4161
4162	#[simd_test(enable = "avx2")]
4163	unsafe fn test_mm256_blendv_epi8() {
4164	let (a, b) = (_mm256_set1_epi8(`4`), _mm256_set1_epi8(`2`));
4165	let mask = _mm256_insert_epi8::<`2`>(_mm256_set1_epi8(`0`), `-1`);
4166	let e = _mm256_insert_epi8::<`2`>(_mm256_set1_epi8(`4`), `2`);
4167	let r = _mm256_blendv_epi8(a, b, mask);
4168	assert_eq_m256i(r, e);
4169	}
4170
4171	#[simd_test(enable = "avx2")]
4172	unsafe fn test_mm_broadcastb_epi8() {
4173	let a = _mm_insert_epi8::<`0`>(_mm_set1_epi8(`0x00`), `0x2a`);
4174	let res = _mm_broadcastb_epi8(a);
4175	assert_eq_m128i(res, _mm_set1_epi8(`0x2a`));
4176	}
4177
4178	#[simd_test(enable = "avx2")]
4179	unsafe fn test_mm256_broadcastb_epi8() {
4180	let a = _mm_insert_epi8::<`0`>(_mm_set1_epi8(`0x00`), `0x2a`);
4181	let res = _mm256_broadcastb_epi8(a);
4182	assert_eq_m256i(res, _mm256_set1_epi8(`0x2a`));
4183	}
4184
4185	#[simd_test(enable = "avx2")]
4186	unsafe fn test_mm_broadcastd_epi32() {
4187	let a = _mm_setr_epi32(`0x2a`, `0x8000000`, `0`, `0`);
4188	let res = _mm_broadcastd_epi32(a);
4189	assert_eq_m128i(res, _mm_set1_epi32(`0x2a`));
4190	}
4191
4192	#[simd_test(enable = "avx2")]
4193	unsafe fn test_mm256_broadcastd_epi32() {
4194	let a = _mm_setr_epi32(`0x2a`, `0x8000000`, `0`, `0`);
4195	let res = _mm256_broadcastd_epi32(a);
4196	assert_eq_m256i(res, _mm256_set1_epi32(`0x2a`));
4197	}
4198
4199	#[simd_test(enable = "avx2")]
4200	unsafe fn test_mm_broadcastq_epi64() {
4201	let a = _mm_setr_epi64x(`0x1ffffffff`, `0`);
4202	let res = _mm_broadcastq_epi64(a);
4203	assert_eq_m128i(res, _mm_set1_epi64x(`0x1ffffffff`));
4204	}
4205
4206	#[simd_test(enable = "avx2")]
4207	unsafe fn test_mm256_broadcastq_epi64() {
4208	let a = _mm_setr_epi64x(`0x1ffffffff`, `0`);
4209	let res = _mm256_broadcastq_epi64(a);
4210	assert_eq_m256i(res, _mm256_set1_epi64x(`0x1ffffffff`));
4211	}
4212
4213	#[simd_test(enable = "avx2")]
4214	unsafe fn test_mm_broadcastsd_pd() {
4215	let a = _mm_setr_pd(`6.88`, `3.44`);
4216	let res = _mm_broadcastsd_pd(a);
4217	assert_eq_m128d(res, _mm_set1_pd(`6.88`));
4218	}
4219
4220	#[simd_test(enable = "avx2")]
4221	unsafe fn test_mm256_broadcastsd_pd() {
4222	let a = _mm_setr_pd(`6.88`, `3.44`);
4223	let res = _mm256_broadcastsd_pd(a);
4224	assert_eq_m256d(res, _mm256_set1_pd(`6.88f64`));
4225	}
4226
4227	#[simd_test(enable = "avx2")]
4228	unsafe fn test_mm256_broadcastsi128_si256() {
4229	let a = _mm_setr_epi64x(`0x0987654321012334`, `0x5678909876543210`);
4230	let res = _mm256_broadcastsi128_si256(a);
4231	let retval = _mm256_setr_epi64x(
4232	`0x0987654321012334`,
4233	`0x5678909876543210`,
4234	`0x0987654321012334`,
4235	`0x5678909876543210`,
4236	);
4237	assert_eq_m256i(res, retval);
4238	}
4239
4240	#[simd_test(enable = "avx2")]
4241	unsafe fn test_mm_broadcastss_ps() {
4242	let a = _mm_setr_ps(`6.88`, `3.44`, `0.0`, `0.0`);
4243	let res = _mm_broadcastss_ps(a);
4244	assert_eq_m128(res, _mm_set1_ps(`6.88`));
4245	}
4246
4247	#[simd_test(enable = "avx2")]
4248	unsafe fn test_mm256_broadcastss_ps() {
4249	let a = _mm_setr_ps(`6.88`, `3.44`, `0.0`, `0.0`);
4250	let res = _mm256_broadcastss_ps(a);
4251	assert_eq_m256(res, _mm256_set1_ps(`6.88`));
4252	}
4253
4254	#[simd_test(enable = "avx2")]
4255	unsafe fn test_mm_broadcastw_epi16() {
4256	let a = _mm_insert_epi16::<`0`>(_mm_set1_epi16(`0x2a`), `0x22b`);
4257	let res = _mm_broadcastw_epi16(a);
4258	assert_eq_m128i(res, _mm_set1_epi16(`0x22b`));
4259	}
4260
4261	#[simd_test(enable = "avx2")]
4262	unsafe fn test_mm256_broadcastw_epi16() {
4263	let a = _mm_insert_epi16::<`0`>(_mm_set1_epi16(`0x2a`), `0x22b`);
4264	let res = _mm256_broadcastw_epi16(a);
4265	assert_eq_m256i(res, _mm256_set1_epi16(`0x22b`));
4266	}
4267
4268	#[simd_test(enable = "avx2")]
4269	unsafe fn test_mm256_cmpeq_epi8() {
4270	#[rustfmt::skip]
4271	let a = _mm256_setr_epi8(
4272	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4273	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4274	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
4275	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
4276	);
4277	#[rustfmt::skip]
4278	let b = _mm256_setr_epi8(
4279	`31`, `30`, `2`, `28`, `27`, `26`, `25`, `24`,
4280	`23`, `22`, `21`, `20`, `19`, `18`, `17`, `16`,
4281	`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`,
4282	`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`,
4283	);
4284	let r = _mm256_cmpeq_epi8(a, b);
4285	assert_eq_m256i(r, _mm256_insert_epi8::<`2`>(_mm256_set1_epi8(`0`), !`0`));
4286	}
4287
4288	#[simd_test(enable = "avx2")]
4289	unsafe fn test_mm256_cmpeq_epi16() {
4290	#[rustfmt::skip]
4291	let a = _mm256_setr_epi16(
4292	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4293	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4294	);
4295	#[rustfmt::skip]
4296	let b = _mm256_setr_epi16(
4297	`15`, `14`, `2`, `12`, `11`, `10`, `9`, `8`,
4298	`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`,
4299	);
4300	let r = _mm256_cmpeq_epi16(a, b);
4301	assert_eq_m256i(r, _mm256_insert_epi16::<`2`>(_mm256_set1_epi16(`0`), !`0`));
4302	}
4303
4304	#[simd_test(enable = "avx2")]
4305	unsafe fn test_mm256_cmpeq_epi32() {
4306	let a = _mm256_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4307	let b = _mm256_setr_epi32(`7`, `6`, `2`, `4`, `3`, `2`, `1`, `0`);
4308	let r = _mm256_cmpeq_epi32(a, b);
4309	let e = _mm256_set1_epi32(`0`);
4310	let e = _mm256_insert_epi32::<`2`>(e, !`0`);
4311	assert_eq_m256i(r, e);
4312	}
4313
4314	#[simd_test(enable = "avx2")]
4315	unsafe fn test_mm256_cmpeq_epi64() {
4316	let a = _mm256_setr_epi64x(`0`, `1`, `2`, `3`);
4317	let b = _mm256_setr_epi64x(`3`, `2`, `2`, `0`);
4318	let r = _mm256_cmpeq_epi64(a, b);
4319	assert_eq_m256i(r, _mm256_insert_epi64::<`2`>(_mm256_set1_epi64x(`0`), !`0`));
4320	}
4321
4322	#[simd_test(enable = "avx2")]
4323	unsafe fn test_mm256_cmpgt_epi8() {
4324	let a = _mm256_insert_epi8::<`0`>(_mm256_set1_epi8(`0`), `5`);
4325	let b = _mm256_set1_epi8(`0`);
4326	let r = _mm256_cmpgt_epi8(a, b);
4327	assert_eq_m256i(r, _mm256_insert_epi8::<`0`>(_mm256_set1_epi8(`0`), !`0`));
4328	}
4329
4330	#[simd_test(enable = "avx2")]
4331	unsafe fn test_mm256_cmpgt_epi16() {
4332	let a = _mm256_insert_epi16::<`0`>(_mm256_set1_epi16(`0`), `5`);
4333	let b = _mm256_set1_epi16(`0`);
4334	let r = _mm256_cmpgt_epi16(a, b);
4335	assert_eq_m256i(r, _mm256_insert_epi16::<`0`>(_mm256_set1_epi16(`0`), !`0`));
4336	}
4337
4338	#[simd_test(enable = "avx2")]
4339	unsafe fn test_mm256_cmpgt_epi32() {
4340	let a = _mm256_insert_epi32::<`0`>(_mm256_set1_epi32(`0`), `5`);
4341	let b = _mm256_set1_epi32(`0`);
4342	let r = _mm256_cmpgt_epi32(a, b);
4343	assert_eq_m256i(r, _mm256_insert_epi32::<`0`>(_mm256_set1_epi32(`0`), !`0`));
4344	}
4345
4346	#[simd_test(enable = "avx2")]
4347	unsafe fn test_mm256_cmpgt_epi64() {
4348	let a = _mm256_insert_epi64::<`0`>(_mm256_set1_epi64x(`0`), `5`);
4349	let b = _mm256_set1_epi64x(`0`);
4350	let r = _mm256_cmpgt_epi64(a, b);
4351	assert_eq_m256i(r, _mm256_insert_epi64::<`0`>(_mm256_set1_epi64x(`0`), !`0`));
4352	}
4353
4354	#[simd_test(enable = "avx2")]
4355	unsafe fn test_mm256_cvtepi8_epi16() {
4356	#[rustfmt::skip]
4357	let a = _mm_setr_epi8(
4358	`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`,
4359	`-4`, `4`, `-5`, `5`, `-6`, `6`, `-7`, `7`,
4360	);
4361	#[rustfmt::skip]
4362	let r = _mm256_setr_epi16(
4363	`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`,
4364	`-4`, `4`, `-5`, `5`, `-6`, `6`, `-7`, `7`,
4365	);
4366	assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4367	}
4368
4369	#[simd_test(enable = "avx2")]
4370	unsafe fn test_mm256_cvtepi8_epi32() {
4371	#[rustfmt::skip]
4372	let a = _mm_setr_epi8(
4373	`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`,
4374	`-4`, `4`, `-5`, `5`, `-6`, `6`, `-7`, `7`,
4375	);
4376	let r = _mm256_setr_epi32(`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`);
4377	assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4378	}
4379
4380	#[simd_test(enable = "avx2")]
4381	unsafe fn test_mm256_cvtepi8_epi64() {
4382	#[rustfmt::skip]
4383	let a = _mm_setr_epi8(
4384	`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`,
4385	`-4`, `4`, `-5`, `5`, `-6`, `6`, `-7`, `7`,
4386	);
4387	let r = _mm256_setr_epi64x(`0`, `0`, `-1`, `1`);
4388	assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4389	}
4390
4391	#[simd_test(enable = "avx2")]
4392	unsafe fn test_mm256_cvtepi16_epi32() {
4393	let a = _mm_setr_epi16(`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`);
4394	let r = _mm256_setr_epi32(`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`);
4395	assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4396	}
4397
4398	#[simd_test(enable = "avx2")]
4399	unsafe fn test_mm256_cvtepi16_epi64() {
4400	let a = _mm_setr_epi16(`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`);
4401	let r = _mm256_setr_epi64x(`0`, `0`, `-1`, `1`);
4402	assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4403	}
4404
4405	#[simd_test(enable = "avx2")]
4406	unsafe fn test_mm256_cvtepi32_epi64() {
4407	let a = _mm_setr_epi32(`0`, `0`, `-1`, `1`);
4408	let r = _mm256_setr_epi64x(`0`, `0`, `-1`, `1`);
4409	assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4410	}
4411
4412	#[simd_test(enable = "avx2")]
4413	unsafe fn test_mm256_cvtepu16_epi32() {
4414	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4415	let r = _mm256_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4416	assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4417	}
4418
4419	#[simd_test(enable = "avx2")]
4420	unsafe fn test_mm256_cvtepu16_epi64() {
4421	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4422	let r = _mm256_setr_epi64x(`0`, `1`, `2`, `3`);
4423	assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4424	}
4425
4426	#[simd_test(enable = "avx2")]
4427	unsafe fn test_mm256_cvtepu32_epi64() {
4428	let a = _mm_setr_epi32(`0`, `1`, `2`, `3`);
4429	let r = _mm256_setr_epi64x(`0`, `1`, `2`, `3`);
4430	assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4431	}
4432
4433	#[simd_test(enable = "avx2")]
4434	unsafe fn test_mm256_cvtepu8_epi16() {
4435	#[rustfmt::skip]
4436	let a = _mm_setr_epi8(
4437	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4438	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4439	);
4440	#[rustfmt::skip]
4441	let r = _mm256_setr_epi16(
4442	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4443	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4444	);
4445	assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4446	}
4447
4448	#[simd_test(enable = "avx2")]
4449	unsafe fn test_mm256_cvtepu8_epi32() {
4450	#[rustfmt::skip]
4451	let a = _mm_setr_epi8(
4452	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4453	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4454	);
4455	let r = _mm256_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4456	assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4457	}
4458
4459	#[simd_test(enable = "avx2")]
4460	unsafe fn test_mm256_cvtepu8_epi64() {
4461	#[rustfmt::skip]
4462	let a = _mm_setr_epi8(
4463	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4464	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4465	);
4466	let r = _mm256_setr_epi64x(`0`, `1`, `2`, `3`);
4467	assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4468	}
4469
4470	#[simd_test(enable = "avx2")]
4471	unsafe fn test_mm256_extracti128_si256() {
4472	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4473	let r = _mm256_extracti128_si256::<`1`>(a);
4474	let e = _mm_setr_epi64x(`3`, `4`);
4475	assert_eq_m128i(r, e);
4476	}
4477
4478	#[simd_test(enable = "avx2")]
4479	unsafe fn test_mm256_hadd_epi16() {
4480	let a = _mm256_set1_epi16(`2`);
4481	let b = _mm256_set1_epi16(`4`);
4482	let r = _mm256_hadd_epi16(a, b);
4483	let e = _mm256_setr_epi16(`4`, `4`, `4`, `4`, `8`, `8`, `8`, `8`, `4`, `4`, `4`, `4`, `8`, `8`, `8`, `8`);
4484	assert_eq_m256i(r, e);
4485	}
4486
4487	#[simd_test(enable = "avx2")]
4488	unsafe fn test_mm256_hadd_epi32() {
4489	let a = _mm256_set1_epi32(`2`);
4490	let b = _mm256_set1_epi32(`4`);
4491	let r = _mm256_hadd_epi32(a, b);
4492	let e = _mm256_setr_epi32(`4`, `4`, `8`, `8`, `4`, `4`, `8`, `8`);
4493	assert_eq_m256i(r, e);
4494	}
4495
4496	#[simd_test(enable = "avx2")]
4497	unsafe fn test_mm256_hadds_epi16() {
4498	let a = _mm256_set1_epi16(`2`);
4499	let a = _mm256_insert_epi16::<`0`>(a, `0x7fff`);
4500	let a = _mm256_insert_epi16::<`1`>(a, `1`);
4501	let b = _mm256_set1_epi16(`4`);
4502	let r = _mm256_hadds_epi16(a, b);
4503	#[rustfmt::skip]
4504	let e = _mm256_setr_epi16(
4505	`0x7FFF`, `4`, `4`, `4`, `8`, `8`, `8`, `8`,
4506	`4`, `4`, `4`, `4`, `8`, `8`, `8`, `8`,
4507	);
4508	assert_eq_m256i(r, e);
4509	}
4510
4511	#[simd_test(enable = "avx2")]
4512	unsafe fn test_mm256_hsub_epi16() {
4513	let a = _mm256_set1_epi16(`2`);
4514	let b = _mm256_set1_epi16(`4`);
4515	let r = _mm256_hsub_epi16(a, b);
4516	let e = _mm256_set1_epi16(`0`);
4517	assert_eq_m256i(r, e);
4518	}
4519
4520	#[simd_test(enable = "avx2")]
4521	unsafe fn test_mm256_hsub_epi32() {
4522	let a = _mm256_set1_epi32(`2`);
4523	let b = _mm256_set1_epi32(`4`);
4524	let r = _mm256_hsub_epi32(a, b);
4525	let e = _mm256_set1_epi32(`0`);
4526	assert_eq_m256i(r, e);
4527	}
4528
4529	#[simd_test(enable = "avx2")]
4530	unsafe fn test_mm256_hsubs_epi16() {
4531	let a = _mm256_set1_epi16(`2`);
4532	let a = _mm256_insert_epi16::<`0`>(a, `0x7fff`);
4533	let a = _mm256_insert_epi16::<`1`>(a, `-1`);
4534	let b = _mm256_set1_epi16(`4`);
4535	let r = _mm256_hsubs_epi16(a, b);
4536	let e = _mm256_insert_epi16::<`0`>(_mm256_set1_epi16(`0`), `0x7FFF`);
4537	assert_eq_m256i(r, e);
4538	}
4539
4540	#[simd_test(enable = "avx2")]
4541	unsafe fn test_mm256_madd_epi16() {
4542	let a = _mm256_set1_epi16(`2`);
4543	let b = _mm256_set1_epi16(`4`);
4544	let r = _mm256_madd_epi16(a, b);
4545	let e = _mm256_set1_epi32(`16`);
4546	assert_eq_m256i(r, e);
4547	}
4548
4549	#[simd_test(enable = "avx2")]
4550	unsafe fn test_mm256_inserti128_si256() {
4551	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4552	let b = _mm_setr_epi64x(`7`, `8`);
4553	let r = _mm256_inserti128_si256::<`1`>(a, b);
4554	let e = _mm256_setr_epi64x(`1`, `2`, `7`, `8`);
4555	assert_eq_m256i(r, e);
4556	}
4557
4558	#[simd_test(enable = "avx2")]
4559	unsafe fn test_mm256_maddubs_epi16() {
4560	let a = _mm256_set1_epi8(`2`);
4561	let b = _mm256_set1_epi8(`4`);
4562	let r = _mm256_maddubs_epi16(a, b);
4563	let e = _mm256_set1_epi16(`16`);
4564	assert_eq_m256i(r, e);
4565	}
4566
4567	#[simd_test(enable = "avx2")]
4568	unsafe fn test_mm_maskload_epi32() {
4569	let nums = [`1`, `2`, `3`, `4`];
4570	let a = &nums as *const i32;
4571	let mask = _mm_setr_epi32(`-1`, `0`, `0`, `-1`);
4572	let r = _mm_maskload_epi32(a, mask);
4573	let e = _mm_setr_epi32(`1`, `0`, `0`, `4`);
4574	assert_eq_m128i(r, e);
4575	}
4576
4577	#[simd_test(enable = "avx2")]
4578	unsafe fn test_mm256_maskload_epi32() {
4579	let nums = [`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
4580	let a = &nums as *const i32;
4581	let mask = _mm256_setr_epi32(`-1`, `0`, `0`, `-1`, `0`, `-1`, `-1`, `0`);
4582	let r = _mm256_maskload_epi32(a, mask);
4583	let e = _mm256_setr_epi32(`1`, `0`, `0`, `4`, `0`, `6`, `7`, `0`);
4584	assert_eq_m256i(r, e);
4585	}
4586
4587	#[simd_test(enable = "avx2")]
4588	unsafe fn test_mm_maskload_epi64() {
4589	let nums = [`1_i64`, `2_i64`];
4590	let a = &nums as *const i64;
4591	let mask = _mm_setr_epi64x(`0`, `-1`);
4592	let r = _mm_maskload_epi64(a, mask);
4593	let e = _mm_setr_epi64x(`0`, `2`);
4594	assert_eq_m128i(r, e);
4595	}
4596
4597	#[simd_test(enable = "avx2")]
4598	unsafe fn test_mm256_maskload_epi64() {
4599	let nums = [`1_i64`, `2_i64`, `3_i64`, `4_i64`];
4600	let a = &nums as *const i64;
4601	let mask = _mm256_setr_epi64x(`0`, `-1`, `-1`, `0`);
4602	let r = _mm256_maskload_epi64(a, mask);
4603	let e = _mm256_setr_epi64x(`0`, `2`, `3`, `0`);
4604	assert_eq_m256i(r, e);
4605	}
4606
4607	#[simd_test(enable = "avx2")]
4608	unsafe fn test_mm_maskstore_epi32() {
4609	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
4610	let mut arr = [`-1`, `-1`, `-1`, `-1`];
4611	let mask = _mm_setr_epi32(`-1`, `0`, `0`, `-1`);
4612	_mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4613	let e = [`1`, `-1`, `-1`, `4`];
4614	assert_eq!(arr, e);
4615	}
4616
4617	#[simd_test(enable = "avx2")]
4618	unsafe fn test_mm256_maskstore_epi32() {
4619	let a = _mm256_setr_epi32(`1`, `0x6d726f`, `3`, `42`, `0x777161`, `6`, `7`, `8`);
4620	let mut arr = [`-1`, `-1`, `-1`, `0x776173`, `-1`, `0x68657265`, `-1`, `-1`];
4621	let mask = _mm256_setr_epi32(`-1`, `0`, `0`, `-1`, `0`, `-1`, `-1`, `0`);
4622	_mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4623	let e = [`1`, `-1`, `-1`, `42`, `-1`, `6`, `7`, `-1`];
4624	assert_eq!(arr, e);
4625	}
4626
4627	#[simd_test(enable = "avx2")]
4628	unsafe fn test_mm_maskstore_epi64() {
4629	let a = _mm_setr_epi64x(`1_i64`, `2_i64`);
4630	let mut arr = [`-1_i64`, `-1_i64`];
4631	let mask = _mm_setr_epi64x(`0`, `-1`);
4632	_mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4633	let e = [`-1`, `2`];
4634	assert_eq!(arr, e);
4635	}
4636
4637	#[simd_test(enable = "avx2")]
4638	unsafe fn test_mm256_maskstore_epi64() {
4639	let a = _mm256_setr_epi64x(`1_i64`, `2_i64`, `3_i64`, `4_i64`);
4640	let mut arr = [`-1_i64`, `-1_i64`, `-1_i64`, `-1_i64`];
4641	let mask = _mm256_setr_epi64x(`0`, `-1`, `-1`, `0`);
4642	_mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4643	let e = [`-1`, `2`, `3`, `-1`];
4644	assert_eq!(arr, e);
4645	}
4646
4647	#[simd_test(enable = "avx2")]
4648	unsafe fn test_mm256_max_epi16() {
4649	let a = _mm256_set1_epi16(`2`);
4650	let b = _mm256_set1_epi16(`4`);
4651	let r = _mm256_max_epi16(a, b);
4652	assert_eq_m256i(r, b);
4653	}
4654
4655	#[simd_test(enable = "avx2")]
4656	unsafe fn test_mm256_max_epi32() {
4657	let a = _mm256_set1_epi32(`2`);
4658	let b = _mm256_set1_epi32(`4`);
4659	let r = _mm256_max_epi32(a, b);
4660	assert_eq_m256i(r, b);
4661	}
4662
4663	#[simd_test(enable = "avx2")]
4664	unsafe fn test_mm256_max_epi8() {
4665	let a = _mm256_set1_epi8(`2`);
4666	let b = _mm256_set1_epi8(`4`);
4667	let r = _mm256_max_epi8(a, b);
4668	assert_eq_m256i(r, b);
4669	}
4670
4671	#[simd_test(enable = "avx2")]
4672	unsafe fn test_mm256_max_epu16() {
4673	let a = _mm256_set1_epi16(`2`);
4674	let b = _mm256_set1_epi16(`4`);
4675	let r = _mm256_max_epu16(a, b);
4676	assert_eq_m256i(r, b);
4677	}
4678
4679	#[simd_test(enable = "avx2")]
4680	unsafe fn test_mm256_max_epu32() {
4681	let a = _mm256_set1_epi32(`2`);
4682	let b = _mm256_set1_epi32(`4`);
4683	let r = _mm256_max_epu32(a, b);
4684	assert_eq_m256i(r, b);
4685	}
4686
4687	#[simd_test(enable = "avx2")]
4688	unsafe fn test_mm256_max_epu8() {
4689	let a = _mm256_set1_epi8(`2`);
4690	let b = _mm256_set1_epi8(`4`);
4691	let r = _mm256_max_epu8(a, b);
4692	assert_eq_m256i(r, b);
4693	}
4694
4695	#[simd_test(enable = "avx2")]
4696	unsafe fn test_mm256_min_epi16() {
4697	let a = _mm256_set1_epi16(`2`);
4698	let b = _mm256_set1_epi16(`4`);
4699	let r = _mm256_min_epi16(a, b);
4700	assert_eq_m256i(r, a);
4701	}
4702
4703	#[simd_test(enable = "avx2")]
4704	unsafe fn test_mm256_min_epi32() {
4705	let a = _mm256_set1_epi32(`2`);
4706	let b = _mm256_set1_epi32(`4`);
4707	let r = _mm256_min_epi32(a, b);
4708	assert_eq_m256i(r, a);
4709	}
4710
4711	#[simd_test(enable = "avx2")]
4712	unsafe fn test_mm256_min_epi8() {
4713	let a = _mm256_set1_epi8(`2`);
4714	let b = _mm256_set1_epi8(`4`);
4715	let r = _mm256_min_epi8(a, b);
4716	assert_eq_m256i(r, a);
4717	}
4718
4719	#[simd_test(enable = "avx2")]
4720	unsafe fn test_mm256_min_epu16() {
4721	let a = _mm256_set1_epi16(`2`);
4722	let b = _mm256_set1_epi16(`4`);
4723	let r = _mm256_min_epu16(a, b);
4724	assert_eq_m256i(r, a);
4725	}
4726
4727	#[simd_test(enable = "avx2")]
4728	unsafe fn test_mm256_min_epu32() {
4729	let a = _mm256_set1_epi32(`2`);
4730	let b = _mm256_set1_epi32(`4`);
4731	let r = _mm256_min_epu32(a, b);
4732	assert_eq_m256i(r, a);
4733	}
4734
4735	#[simd_test(enable = "avx2")]
4736	unsafe fn test_mm256_min_epu8() {
4737	let a = _mm256_set1_epi8(`2`);
4738	let b = _mm256_set1_epi8(`4`);
4739	let r = _mm256_min_epu8(a, b);
4740	assert_eq_m256i(r, a);
4741	}
4742
4743	#[simd_test(enable = "avx2")]
4744	unsafe fn test_mm256_movemask_epi8() {
4745	let a = _mm256_set1_epi8(`-1`);
4746	let r = _mm256_movemask_epi8(a);
4747	let e = `-1`;
4748	assert_eq!(r, e);
4749	}
4750
4751	#[simd_test(enable = "avx2")]
4752	unsafe fn test_mm256_mpsadbw_epu8() {
4753	let a = _mm256_set1_epi8(`2`);
4754	let b = _mm256_set1_epi8(`4`);
4755	let r = _mm256_mpsadbw_epu8::<`0`>(a, b);
4756	let e = _mm256_set1_epi16(`8`);
4757	assert_eq_m256i(r, e);
4758	}
4759
4760	#[simd_test(enable = "avx2")]
4761	unsafe fn test_mm256_mul_epi32() {
4762	let a = _mm256_setr_epi32(`0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`);
4763	let b = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
4764	let r = _mm256_mul_epi32(a, b);
4765	let e = _mm256_setr_epi64x(`0`, `0`, `10`, `14`);
4766	assert_eq_m256i(r, e);
4767	}
4768
4769	#[simd_test(enable = "avx2")]
4770	unsafe fn test_mm256_mul_epu32() {
4771	let a = _mm256_setr_epi32(`0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`);
4772	let b = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
4773	let r = _mm256_mul_epu32(a, b);
4774	let e = _mm256_setr_epi64x(`0`, `0`, `10`, `14`);
4775	assert_eq_m256i(r, e);
4776	}
4777
4778	#[simd_test(enable = "avx2")]
4779	unsafe fn test_mm256_mulhi_epi16() {
4780	let a = _mm256_set1_epi16(`6535`);
4781	let b = _mm256_set1_epi16(`6535`);
4782	let r = _mm256_mulhi_epi16(a, b);
4783	let e = _mm256_set1_epi16(`651`);
4784	assert_eq_m256i(r, e);
4785	}
4786
4787	#[simd_test(enable = "avx2")]
4788	unsafe fn test_mm256_mulhi_epu16() {
4789	let a = _mm256_set1_epi16(`6535`);
4790	let b = _mm256_set1_epi16(`6535`);
4791	let r = _mm256_mulhi_epu16(a, b);
4792	let e = _mm256_set1_epi16(`651`);
4793	assert_eq_m256i(r, e);
4794	}
4795
4796	#[simd_test(enable = "avx2")]
4797	unsafe fn test_mm256_mullo_epi16() {
4798	let a = _mm256_set1_epi16(`2`);
4799	let b = _mm256_set1_epi16(`4`);
4800	let r = _mm256_mullo_epi16(a, b);
4801	let e = _mm256_set1_epi16(`8`);
4802	assert_eq_m256i(r, e);
4803	}
4804
4805	#[simd_test(enable = "avx2")]
4806	unsafe fn test_mm256_mullo_epi32() {
4807	let a = _mm256_set1_epi32(`2`);
4808	let b = _mm256_set1_epi32(`4`);
4809	let r = _mm256_mullo_epi32(a, b);
4810	let e = _mm256_set1_epi32(`8`);
4811	assert_eq_m256i(r, e);
4812	}
4813
4814	#[simd_test(enable = "avx2")]
4815	unsafe fn test_mm256_mulhrs_epi16() {
4816	let a = _mm256_set1_epi16(`2`);
4817	let b = _mm256_set1_epi16(`4`);
4818	let r = _mm256_mullo_epi16(a, b);
4819	let e = _mm256_set1_epi16(`8`);
4820	assert_eq_m256i(r, e);
4821	}
4822
4823	#[simd_test(enable = "avx2")]
4824	unsafe fn test_mm256_or_si256() {
4825	let a = _mm256_set1_epi8(`-1`);
4826	let b = _mm256_set1_epi8(`0`);
4827	let r = _mm256_or_si256(a, b);
4828	assert_eq_m256i(r, a);
4829	}
4830
4831	#[simd_test(enable = "avx2")]
4832	unsafe fn test_mm256_packs_epi16() {
4833	let a = _mm256_set1_epi16(`2`);
4834	let b = _mm256_set1_epi16(`4`);
4835	let r = _mm256_packs_epi16(a, b);
4836	#[rustfmt::skip]
4837	let e = _mm256_setr_epi8(
4838	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
4839	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
4840	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
4841	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
4842	);
4843
4844	assert_eq_m256i(r, e);
4845	}
4846
4847	#[simd_test(enable = "avx2")]
4848	unsafe fn test_mm256_packs_epi32() {
4849	let a = _mm256_set1_epi32(`2`);
4850	let b = _mm256_set1_epi32(`4`);
4851	let r = _mm256_packs_epi32(a, b);
4852	let e = _mm256_setr_epi16(`2`, `2`, `2`, `2`, `4`, `4`, `4`, `4`, `2`, `2`, `2`, `2`, `4`, `4`, `4`, `4`);
4853
4854	assert_eq_m256i(r, e);
4855	}
4856
4857	#[simd_test(enable = "avx2")]
4858	unsafe fn test_mm256_packus_epi16() {
4859	let a = _mm256_set1_epi16(`2`);
4860	let b = _mm256_set1_epi16(`4`);
4861	let r = _mm256_packus_epi16(a, b);
4862	#[rustfmt::skip]
4863	let e = _mm256_setr_epi8(
4864	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
4865	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
4866	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
4867	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
4868	);
4869
4870	assert_eq_m256i(r, e);
4871	}
4872
4873	#[simd_test(enable = "avx2")]
4874	unsafe fn test_mm256_packus_epi32() {
4875	let a = _mm256_set1_epi32(`2`);
4876	let b = _mm256_set1_epi32(`4`);
4877	let r = _mm256_packus_epi32(a, b);
4878	let e = _mm256_setr_epi16(`2`, `2`, `2`, `2`, `4`, `4`, `4`, `4`, `2`, `2`, `2`, `2`, `4`, `4`, `4`, `4`);
4879
4880	assert_eq_m256i(r, e);
4881	}
4882
4883	#[simd_test(enable = "avx2")]
4884	unsafe fn test_mm256_sad_epu8() {
4885	let a = _mm256_set1_epi8(`2`);
4886	let b = _mm256_set1_epi8(`4`);
4887	let r = _mm256_sad_epu8(a, b);
4888	let e = _mm256_set1_epi64x(`16`);
4889	assert_eq_m256i(r, e);
4890	}
4891
4892	#[simd_test(enable = "avx2")]
4893	unsafe fn test_mm256_shufflehi_epi16() {
4894	#[rustfmt::skip]
4895	let a = _mm256_setr_epi16(
4896	`0`, `1`, `2`, `3`, `11`, `22`, `33`, `44`,
4897	`4`, `5`, `6`, `7`, `55`, `66`, `77`, `88`,
4898	);
4899	#[rustfmt::skip]
4900	let e = _mm256_setr_epi16(
4901	`0`, `1`, `2`, `3`, `44`, `22`, `22`, `11`,
4902	`4`, `5`, `6`, `7`, `88`, `66`, `66`, `55`,
4903	);
4904	let r = _mm256_shufflehi_epi16::<`0b00_01_01_11`>(a);
4905	assert_eq_m256i(r, e);
4906	}
4907
4908	#[simd_test(enable = "avx2")]
4909	unsafe fn test_mm256_shufflelo_epi16() {
4910	#[rustfmt::skip]
4911	let a = _mm256_setr_epi16(
4912	`11`, `22`, `33`, `44`, `0`, `1`, `2`, `3`,
4913	`55`, `66`, `77`, `88`, `4`, `5`, `6`, `7`,
4914	);
4915	#[rustfmt::skip]
4916	let e = _mm256_setr_epi16(
4917	`44`, `22`, `22`, `11`, `0`, `1`, `2`, `3`,
4918	`88`, `66`, `66`, `55`, `4`, `5`, `6`, `7`,
4919	);
4920	let r = _mm256_shufflelo_epi16::<`0b00_01_01_11`>(a);
4921	assert_eq_m256i(r, e);
4922	}
4923
4924	#[simd_test(enable = "avx2")]
4925	unsafe fn test_mm256_sign_epi16() {
4926	let a = _mm256_set1_epi16(`2`);
4927	let b = _mm256_set1_epi16(`-1`);
4928	let r = _mm256_sign_epi16(a, b);
4929	let e = _mm256_set1_epi16(`-2`);
4930	assert_eq_m256i(r, e);
4931	}
4932
4933	#[simd_test(enable = "avx2")]
4934	unsafe fn test_mm256_sign_epi32() {
4935	let a = _mm256_set1_epi32(`2`);
4936	let b = _mm256_set1_epi32(`-1`);
4937	let r = _mm256_sign_epi32(a, b);
4938	let e = _mm256_set1_epi32(`-2`);
4939	assert_eq_m256i(r, e);
4940	}
4941
4942	#[simd_test(enable = "avx2")]
4943	unsafe fn test_mm256_sign_epi8() {
4944	let a = _mm256_set1_epi8(`2`);
4945	let b = _mm256_set1_epi8(`-1`);
4946	let r = _mm256_sign_epi8(a, b);
4947	let e = _mm256_set1_epi8(`-2`);
4948	assert_eq_m256i(r, e);
4949	}
4950
4951	#[simd_test(enable = "avx2")]
4952	unsafe fn test_mm256_sll_epi16() {
4953	let a = _mm256_set1_epi16(`0xFF`);
4954	let b = _mm_insert_epi16::<`0`>(_mm_set1_epi16(`0`), `4`);
4955	let r = _mm256_sll_epi16(a, b);
4956	assert_eq_m256i(r, _mm256_set1_epi16(`0xFF0`));
4957	}
4958
4959	#[simd_test(enable = "avx2")]
4960	unsafe fn test_mm256_sll_epi32() {
4961	let a = _mm256_set1_epi32(`0xFFFF`);
4962	let b = _mm_insert_epi32::<`0`>(_mm_set1_epi32(`0`), `4`);
4963	let r = _mm256_sll_epi32(a, b);
4964	assert_eq_m256i(r, _mm256_set1_epi32(`0xFFFF0`));
4965	}
4966
4967	#[simd_test(enable = "avx2")]
4968	unsafe fn test_mm256_sll_epi64() {
4969	let a = _mm256_set1_epi64x(`0xFFFFFFFF`);
4970	let b = _mm_insert_epi64::<`0`>(_mm_set1_epi64x(`0`), `4`);
4971	let r = _mm256_sll_epi64(a, b);
4972	assert_eq_m256i(r, _mm256_set1_epi64x(`0xFFFFFFFF0`));
4973	}
4974
4975	#[simd_test(enable = "avx2")]
4976	unsafe fn test_mm256_slli_epi16() {
4977	assert_eq_m256i(
4978	_mm256_slli_epi16::<`4`>(_mm256_set1_epi16(`0xFF`)),
4979	_mm256_set1_epi16(`0xFF0`),
4980	);
4981	}
4982
4983	#[simd_test(enable = "avx2")]
4984	unsafe fn test_mm256_slli_epi32() {
4985	assert_eq_m256i(
4986	_mm256_slli_epi32::<`4`>(_mm256_set1_epi32(`0xFFFF`)),
4987	_mm256_set1_epi32(`0xFFFF0`),
4988	);
4989	}
4990
4991	#[simd_test(enable = "avx2")]
4992	unsafe fn test_mm256_slli_epi64() {
4993	assert_eq_m256i(
4994	_mm256_slli_epi64::<`4`>(_mm256_set1_epi64x(`0xFFFFFFFF`)),
4995	_mm256_set1_epi64x(`0xFFFFFFFF0`),
4996	);
4997	}
4998
4999	#[simd_test(enable = "avx2")]
5000	unsafe fn test_mm256_slli_si256() {
5001	let a = _mm256_set1_epi64x(`0xFFFFFFFF`);
5002	let r = _mm256_slli_si256::<`3`>(a);
5003	assert_eq_m256i(r, _mm256_set1_epi64x(`0xFFFFFFFF000000`));
5004	}
5005
5006	#[simd_test(enable = "avx2")]
5007	unsafe fn test_mm_sllv_epi32() {
5008	let a = _mm_set1_epi32(`2`);
5009	let b = _mm_set1_epi32(`1`);
5010	let r = _mm_sllv_epi32(a, b);
5011	let e = _mm_set1_epi32(`4`);
5012	assert_eq_m128i(r, e);
5013	}
5014
5015	#[simd_test(enable = "avx2")]
5016	unsafe fn test_mm256_sllv_epi32() {
5017	let a = _mm256_set1_epi32(`2`);
5018	let b = _mm256_set1_epi32(`1`);
5019	let r = _mm256_sllv_epi32(a, b);
5020	let e = _mm256_set1_epi32(`4`);
5021	assert_eq_m256i(r, e);
5022	}
5023
5024	#[simd_test(enable = "avx2")]
5025	unsafe fn test_mm_sllv_epi64() {
5026	let a = _mm_set1_epi64x(`2`);
5027	let b = _mm_set1_epi64x(`1`);
5028	let r = _mm_sllv_epi64(a, b);
5029	let e = _mm_set1_epi64x(`4`);
5030	assert_eq_m128i(r, e);
5031	}
5032
5033	#[simd_test(enable = "avx2")]
5034	unsafe fn test_mm256_sllv_epi64() {
5035	let a = _mm256_set1_epi64x(`2`);
5036	let b = _mm256_set1_epi64x(`1`);
5037	let r = _mm256_sllv_epi64(a, b);
5038	let e = _mm256_set1_epi64x(`4`);
5039	assert_eq_m256i(r, e);
5040	}
5041
5042	#[simd_test(enable = "avx2")]
5043	unsafe fn test_mm256_sra_epi16() {
5044	let a = _mm256_set1_epi16(`-1`);
5045	let b = _mm_setr_epi16(`1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
5046	let r = _mm256_sra_epi16(a, b);
5047	assert_eq_m256i(r, _mm256_set1_epi16(`-1`));
5048	}
5049
5050	#[simd_test(enable = "avx2")]
5051	unsafe fn test_mm256_sra_epi32() {
5052	let a = _mm256_set1_epi32(`-1`);
5053	let b = _mm_insert_epi32::<`0`>(_mm_set1_epi32(`0`), `1`);
5054	let r = _mm256_sra_epi32(a, b);
5055	assert_eq_m256i(r, _mm256_set1_epi32(`-1`));
5056	}
5057
5058	#[simd_test(enable = "avx2")]
5059	unsafe fn test_mm256_srai_epi16() {
5060	assert_eq_m256i(
5061	_mm256_srai_epi16::<`1`>(_mm256_set1_epi16(`-1`)),
5062	_mm256_set1_epi16(`-1`),
5063	);
5064	}
5065
5066	#[simd_test(enable = "avx2")]
5067	unsafe fn test_mm256_srai_epi32() {
5068	assert_eq_m256i(
5069	_mm256_srai_epi32::<`1`>(_mm256_set1_epi32(`-1`)),
5070	_mm256_set1_epi32(`-1`),
5071	);
5072	}
5073
5074	#[simd_test(enable = "avx2")]
5075	unsafe fn test_mm_srav_epi32() {
5076	let a = _mm_set1_epi32(`4`);
5077	let count = _mm_set1_epi32(`1`);
5078	let r = _mm_srav_epi32(a, count);
5079	let e = _mm_set1_epi32(`2`);
5080	assert_eq_m128i(r, e);
5081	}
5082
5083	#[simd_test(enable = "avx2")]
5084	unsafe fn test_mm256_srav_epi32() {
5085	let a = _mm256_set1_epi32(`4`);
5086	let count = _mm256_set1_epi32(`1`);
5087	let r = _mm256_srav_epi32(a, count);
5088	let e = _mm256_set1_epi32(`2`);
5089	assert_eq_m256i(r, e);
5090	}
5091
5092	#[simd_test(enable = "avx2")]
5093	unsafe fn test_mm256_srli_si256() {
5094	#[rustfmt::skip]
5095	let a = _mm256_setr_epi8(
5096	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
5097	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
5098	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
5099	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
5100	);
5101	let r = _mm256_srli_si256::<`3`>(a);
5102	#[rustfmt::skip]
5103	let e = _mm256_setr_epi8(
5104	`4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`,
5105	`12`, `13`, `14`, `15`, `16`, `0`, `0`, `0`,
5106	`20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`,
5107	`28`, `29`, `30`, `31`, `32`, `0`, `0`, `0`,
5108	);
5109	assert_eq_m256i(r, e);
5110	}
5111
5112	#[simd_test(enable = "avx2")]
5113	unsafe fn test_mm256_srl_epi16() {
5114	let a = _mm256_set1_epi16(`0xFF`);
5115	let b = _mm_insert_epi16::<`0`>(_mm_set1_epi16(`0`), `4`);
5116	let r = _mm256_srl_epi16(a, b);
5117	assert_eq_m256i(r, _mm256_set1_epi16(`0xF`));
5118	}
5119
5120	#[simd_test(enable = "avx2")]
5121	unsafe fn test_mm256_srl_epi32() {
5122	let a = _mm256_set1_epi32(`0xFFFF`);
5123	let b = _mm_insert_epi32::<`0`>(_mm_set1_epi32(`0`), `4`);
5124	let r = _mm256_srl_epi32(a, b);
5125	assert_eq_m256i(r, _mm256_set1_epi32(`0xFFF`));
5126	}
5127
5128	#[simd_test(enable = "avx2")]
5129	unsafe fn test_mm256_srl_epi64() {
5130	let a = _mm256_set1_epi64x(`0xFFFFFFFF`);
5131	let b = _mm_setr_epi64x(`4`, `0`);
5132	let r = _mm256_srl_epi64(a, b);
5133	assert_eq_m256i(r, _mm256_set1_epi64x(`0xFFFFFFF`));
5134	}
5135
5136	#[simd_test(enable = "avx2")]
5137	unsafe fn test_mm256_srli_epi16() {
5138	assert_eq_m256i(
5139	_mm256_srli_epi16::<`4`>(_mm256_set1_epi16(`0xFF`)),
5140	_mm256_set1_epi16(`0xF`),
5141	);
5142	}
5143
5144	#[simd_test(enable = "avx2")]
5145	unsafe fn test_mm256_srli_epi32() {
5146	assert_eq_m256i(
5147	_mm256_srli_epi32::<`4`>(_mm256_set1_epi32(`0xFFFF`)),
5148	_mm256_set1_epi32(`0xFFF`),
5149	);
5150	}
5151
5152	#[simd_test(enable = "avx2")]
5153	unsafe fn test_mm256_srli_epi64() {
5154	assert_eq_m256i(
5155	_mm256_srli_epi64::<`4`>(_mm256_set1_epi64x(`0xFFFFFFFF`)),
5156	_mm256_set1_epi64x(`0xFFFFFFF`),
5157	);
5158	}
5159
5160	#[simd_test(enable = "avx2")]
5161	unsafe fn test_mm_srlv_epi32() {
5162	let a = _mm_set1_epi32(`2`);
5163	let count = _mm_set1_epi32(`1`);
5164	let r = _mm_srlv_epi32(a, count);
5165	let e = _mm_set1_epi32(`1`);
5166	assert_eq_m128i(r, e);
5167	}
5168
5169	#[simd_test(enable = "avx2")]
5170	unsafe fn test_mm256_srlv_epi32() {
5171	let a = _mm256_set1_epi32(`2`);
5172	let count = _mm256_set1_epi32(`1`);
5173	let r = _mm256_srlv_epi32(a, count);
5174	let e = _mm256_set1_epi32(`1`);
5175	assert_eq_m256i(r, e);
5176	}
5177
5178	#[simd_test(enable = "avx2")]
5179	unsafe fn test_mm_srlv_epi64() {
5180	let a = _mm_set1_epi64x(`2`);
5181	let count = _mm_set1_epi64x(`1`);
5182	let r = _mm_srlv_epi64(a, count);
5183	let e = _mm_set1_epi64x(`1`);
5184	assert_eq_m128i(r, e);
5185	}
5186
5187	#[simd_test(enable = "avx2")]
5188	unsafe fn test_mm256_srlv_epi64() {
5189	let a = _mm256_set1_epi64x(`2`);
5190	let count = _mm256_set1_epi64x(`1`);
5191	let r = _mm256_srlv_epi64(a, count);
5192	let e = _mm256_set1_epi64x(`1`);
5193	assert_eq_m256i(r, e);
5194	}
5195
5196	#[simd_test(enable = "avx2")]
5197	unsafe fn test_mm256_sub_epi16() {
5198	let a = _mm256_set1_epi16(`4`);
5199	let b = _mm256_set1_epi16(`2`);
5200	let r = _mm256_sub_epi16(a, b);
5201	assert_eq_m256i(r, b);
5202	}
5203
5204	#[simd_test(enable = "avx2")]
5205	unsafe fn test_mm256_sub_epi32() {
5206	let a = _mm256_set1_epi32(`4`);
5207	let b = _mm256_set1_epi32(`2`);
5208	let r = _mm256_sub_epi32(a, b);
5209	assert_eq_m256i(r, b);
5210	}
5211
5212	#[simd_test(enable = "avx2")]
5213	unsafe fn test_mm256_sub_epi64() {
5214	let a = _mm256_set1_epi64x(`4`);
5215	let b = _mm256_set1_epi64x(`2`);
5216	let r = _mm256_sub_epi64(a, b);
5217	assert_eq_m256i(r, b);
5218	}
5219
5220	#[simd_test(enable = "avx2")]
5221	unsafe fn test_mm256_sub_epi8() {
5222	let a = _mm256_set1_epi8(`4`);
5223	let b = _mm256_set1_epi8(`2`);
5224	let r = _mm256_sub_epi8(a, b);
5225	assert_eq_m256i(r, b);
5226	}
5227
5228	#[simd_test(enable = "avx2")]
5229	unsafe fn test_mm256_subs_epi16() {
5230	let a = _mm256_set1_epi16(`4`);
5231	let b = _mm256_set1_epi16(`2`);
5232	let r = _mm256_subs_epi16(a, b);
5233	assert_eq_m256i(r, b);
5234	}
5235
5236	#[simd_test(enable = "avx2")]
5237	unsafe fn test_mm256_subs_epi8() {
5238	let a = _mm256_set1_epi8(`4`);
5239	let b = _mm256_set1_epi8(`2`);
5240	let r = _mm256_subs_epi8(a, b);
5241	assert_eq_m256i(r, b);
5242	}
5243
5244	#[simd_test(enable = "avx2")]
5245	unsafe fn test_mm256_subs_epu16() {
5246	let a = _mm256_set1_epi16(`4`);
5247	let b = _mm256_set1_epi16(`2`);
5248	let r = _mm256_subs_epu16(a, b);
5249	assert_eq_m256i(r, b);
5250	}
5251
5252	#[simd_test(enable = "avx2")]
5253	unsafe fn test_mm256_subs_epu8() {
5254	let a = _mm256_set1_epi8(`4`);
5255	let b = _mm256_set1_epi8(`2`);
5256	let r = _mm256_subs_epu8(a, b);
5257	assert_eq_m256i(r, b);
5258	}
5259
5260	#[simd_test(enable = "avx2")]
5261	unsafe fn test_mm256_xor_si256() {
5262	let a = _mm256_set1_epi8(`5`);
5263	let b = _mm256_set1_epi8(`3`);
5264	let r = _mm256_xor_si256(a, b);
5265	assert_eq_m256i(r, _mm256_set1_epi8(`6`));
5266	}
5267
5268	#[simd_test(enable = "avx2")]
5269	unsafe fn test_mm256_alignr_epi8() {
5270	#[rustfmt::skip]
5271	let a = _mm256_setr_epi8(
5272	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
5273	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
5274	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
5275	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
5276	);
5277	#[rustfmt::skip]
5278	let b = _mm256_setr_epi8(
5279	`-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`, `-8`,
5280	`-9`, `-10`, `-11`, `-12`, `-13`, `-14`, `-15`, `-16`,
5281	`-17`, `-18`, `-19`, `-20`, `-21`, `-22`, `-23`, `-24`,
5282	`-25`, `-26`, `-27`, `-28`, `-29`, `-30`, `-31`, `-32`,
5283	);
5284	let r = _mm256_alignr_epi8::<`33`>(a, b);
5285	assert_eq_m256i(r, _mm256_set1_epi8(`0`));
5286
5287	let r = _mm256_alignr_epi8::<`17`>(a, b);
5288	#[rustfmt::skip]
5289	let expected = _mm256_setr_epi8(
5290	`2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`,
5291	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `0`,
5292	`18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
5293	`26`, `27`, `28`, `29`, `30`, `31`, `32`, `0`,
5294	);
5295	assert_eq_m256i(r, expected);
5296
5297	let r = _mm256_alignr_epi8::<`4`>(a, b);
5298	#[rustfmt::skip]
5299	let expected = _mm256_setr_epi8(
5300	`-5`, `-6`, `-7`, `-8`, `-9`, `-10`, `-11`, `-12`,
5301	`-13`, `-14`, `-15`, `-16`, `1`, `2`, `3`, `4`,
5302	`-21`, `-22`, `-23`, `-24`, `-25`, `-26`, `-27`, `-28`,
5303	`-29`, `-30`, `-31`, `-32`, `17`, `18`, `19`, `20`,
5304	);
5305	assert_eq_m256i(r, expected);
5306
5307	#[rustfmt::skip]
5308	let expected = _mm256_setr_epi8(
5309	`-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`, `-8`,
5310	`-9`, `-10`, `-11`, `-12`, `-13`, `-14`, `-15`, `-16`, `-17`,
5311	`-18`, `-19`, `-20`, `-21`, `-22`, `-23`, `-24`, `-25`,
5312	`-26`, `-27`, `-28`, `-29`, `-30`, `-31`, `-32`,
5313	);
5314	let r = _mm256_alignr_epi8::<`16`>(a, b);
5315	assert_eq_m256i(r, expected);
5316
5317	let r = _mm256_alignr_epi8::<`15`>(a, b);
5318	#[rustfmt::skip]
5319	let expected = _mm256_setr_epi8(
5320	`-16`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
5321	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
5322	`-32`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
5323	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
5324	);
5325	assert_eq_m256i(r, expected);
5326
5327	let r = _mm256_alignr_epi8::<`0`>(a, b);
5328	assert_eq_m256i(r, b);
5329	}
5330
5331	#[simd_test(enable = "avx2")]
5332	unsafe fn test_mm256_shuffle_epi8() {
5333	#[rustfmt::skip]
5334	let a = _mm256_setr_epi8(
5335	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
5336	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
5337	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
5338	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
5339	);
5340	#[rustfmt::skip]
5341	let b = _mm256_setr_epi8(
5342	`4`, `128u8` as i8, `4`, `3`, `24`, `12`, `6`, `19`,
5343	`12`, `5`, `5`, `10`, `4`, `1`, `8`, `0`,
5344	`4`, `128u8` as i8, `4`, `3`, `24`, `12`, `6`, `19`,
5345	`12`, `5`, `5`, `10`, `4`, `1`, `8`, `0`,
5346	);
5347	#[rustfmt::skip]
5348	let expected = _mm256_setr_epi8(
5349	`5`, `0`, `5`, `4`, `9`, `13`, `7`, `4`,
5350	`13`, `6`, `6`, `11`, `5`, `2`, `9`, `1`,
5351	`21`, `0`, `21`, `20`, `25`, `29`, `23`, `20`,
5352	`29`, `22`, `22`, `27`, `21`, `18`, `25`, `17`,
5353	);
5354	let r = _mm256_shuffle_epi8(a, b);
5355	assert_eq_m256i(r, expected);
5356	}
5357
5358	#[simd_test(enable = "avx2")]
5359	unsafe fn test_mm256_permutevar8x32_epi32() {
5360	let a = _mm256_setr_epi32(`100`, `200`, `300`, `400`, `500`, `600`, `700`, `800`);
5361	let b = _mm256_setr_epi32(`5`, `0`, `5`, `1`, `7`, `6`, `3`, `4`);
5362	let expected = _mm256_setr_epi32(`600`, `100`, `600`, `200`, `800`, `700`, `400`, `500`);
5363	let r = _mm256_permutevar8x32_epi32(a, b);
5364	assert_eq_m256i(r, expected);
5365	}
5366
5367	#[simd_test(enable = "avx2")]
5368	unsafe fn test_mm256_permute4x64_epi64() {
5369	let a = _mm256_setr_epi64x(`100`, `200`, `300`, `400`);
5370	let expected = _mm256_setr_epi64x(`400`, `100`, `200`, `100`);
5371	let r = _mm256_permute4x64_epi64::<`0b00010011`>(a);
5372	assert_eq_m256i(r, expected);
5373	}
5374
5375	#[simd_test(enable = "avx2")]
5376	unsafe fn test_mm256_permute2x128_si256() {
5377	let a = _mm256_setr_epi64x(`100`, `200`, `500`, `600`);
5378	let b = _mm256_setr_epi64x(`300`, `400`, `700`, `800`);
5379	let r = _mm256_permute2x128_si256::<`0b00_01_00_11`>(a, b);
5380	let e = _mm256_setr_epi64x(`700`, `800`, `500`, `600`);
5381	assert_eq_m256i(r, e);
5382	}
5383
5384	#[simd_test(enable = "avx2")]
5385	unsafe fn test_mm256_permute4x64_pd() {
5386	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
5387	let r = _mm256_permute4x64_pd::<`0b00_01_00_11`>(a);
5388	let e = _mm256_setr_pd(`4.`, `1.`, `2.`, `1.`);
5389	assert_eq_m256d(r, e);
5390	}
5391
5392	#[simd_test(enable = "avx2")]
5393	unsafe fn test_mm256_permutevar8x32_ps() {
5394	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
5395	let b = _mm256_setr_epi32(`5`, `0`, `5`, `1`, `7`, `6`, `3`, `4`);
5396	let r = _mm256_permutevar8x32_ps(a, b);
5397	let e = _mm256_setr_ps(`6.`, `1.`, `6.`, `2.`, `8.`, `7.`, `4.`, `5.`);
5398	assert_eq_m256(r, e);
5399	}
5400
5401	#[simd_test(enable = "avx2")]
5402	unsafe fn test_mm_i32gather_epi32() {
5403	let arr: [i32; `128`] = core::array::from_fn(\|i\| i as i32);
5404	// A multiplier of 4 is word-addressing
5405	let r = _mm_i32gather_epi32::<`4`>(arr.as_ptr(), _mm_setr_epi32(`0`, `16`, `32`, `48`));
5406	assert_eq_m128i(r, _mm_setr_epi32(`0`, `16`, `32`, `48`));
5407	}
5408
5409	#[simd_test(enable = "avx2")]
5410	unsafe fn test_mm_mask_i32gather_epi32() {
5411	let arr: [i32; `128`] = core::array::from_fn(\|i\| i as i32);
5412	// A multiplier of 4 is word-addressing
5413	let r = _mm_mask_i32gather_epi32::<`4`>(
5414	_mm_set1_epi32(`256`),
5415	arr.as_ptr(),
5416	_mm_setr_epi32(`0`, `16`, `64`, `96`),
5417	_mm_setr_epi32(`-1`, `-1`, `-1`, `0`),
5418	);
5419	assert_eq_m128i(r, _mm_setr_epi32(`0`, `16`, `64`, `256`));
5420	}
5421
5422	#[simd_test(enable = "avx2")]
5423	unsafe fn test_mm256_i32gather_epi32() {
5424	let arr: [i32; `128`] = core::array::from_fn(\|i\| i as i32);
5425	// A multiplier of 4 is word-addressing
5426	let r =
5427	_mm256_i32gather_epi32::<`4`>(arr.as_ptr(), _mm256_setr_epi32(`0`, `16`, `32`, `48`, `1`, `2`, `3`, `4`));
5428	assert_eq_m256i(r, _mm256_setr_epi32(`0`, `16`, `32`, `48`, `1`, `2`, `3`, `4`));
5429	}
5430
5431	#[simd_test(enable = "avx2")]
5432	unsafe fn test_mm256_mask_i32gather_epi32() {
5433	let arr: [i32; `128`] = core::array::from_fn(\|i\| i as i32);
5434	// A multiplier of 4 is word-addressing
5435	let r = _mm256_mask_i32gather_epi32::<`4`>(
5436	_mm256_set1_epi32(`256`),
5437	arr.as_ptr(),
5438	_mm256_setr_epi32(`0`, `16`, `64`, `96`, `0`, `0`, `0`, `0`),
5439	_mm256_setr_epi32(`-1`, `-1`, `-1`, `0`, `0`, `0`, `0`, `0`),
5440	);
5441	assert_eq_m256i(r, _mm256_setr_epi32(`0`, `16`, `64`, `256`, `256`, `256`, `256`, `256`));
5442	}
5443
5444	#[simd_test(enable = "avx2")]
5445	unsafe fn test_mm_i32gather_ps() {
5446	let arr: [f32; `128`] = core::array::from_fn(\|i\| i as f32);
5447	// A multiplier of 4 is word-addressing for f32s
5448	let r = _mm_i32gather_ps::<`4`>(arr.as_ptr(), _mm_setr_epi32(`0`, `16`, `32`, `48`));
5449	assert_eq_m128(r, _mm_setr_ps(`0.0`, `16.0`, `32.0`, `48.0`));
5450	}
5451
5452	#[simd_test(enable = "avx2")]
5453	unsafe fn test_mm_mask_i32gather_ps() {
5454	let arr: [f32; `128`] = core::array::from_fn(\|i\| i as f32);
5455	// A multiplier of 4 is word-addressing for f32s
5456	let r = _mm_mask_i32gather_ps::<`4`>(
5457	_mm_set1_ps(`256.0`),
5458	arr.as_ptr(),
5459	_mm_setr_epi32(`0`, `16`, `64`, `96`),
5460	_mm_setr_ps(`-1.0`, `-1.0`, `-1.0`, `0.0`),
5461	);
5462	assert_eq_m128(r, _mm_setr_ps(`0.0`, `16.0`, `64.0`, `256.0`));
5463	}
5464
5465	#[simd_test(enable = "avx2")]
5466	unsafe fn test_mm256_i32gather_ps() {
5467	let arr: [f32; `128`] = core::array::from_fn(\|i\| i as f32);
5468	// A multiplier of 4 is word-addressing for f32s
5469	let r =
5470	_mm256_i32gather_ps::<`4`>(arr.as_ptr(), _mm256_setr_epi32(`0`, `16`, `32`, `48`, `1`, `2`, `3`, `4`));
5471	assert_eq_m256(r, _mm256_setr_ps(`0.0`, `16.0`, `32.0`, `48.0`, `1.0`, `2.0`, `3.0`, `4.0`));
5472	}
5473
5474	#[simd_test(enable = "avx2")]
5475	unsafe fn test_mm256_mask_i32gather_ps() {
5476	let arr: [f32; `128`] = core::array::from_fn(\|i\| i as f32);
5477	// A multiplier of 4 is word-addressing for f32s
5478	let r = _mm256_mask_i32gather_ps::<`4`>(
5479	_mm256_set1_ps(`256.0`),
5480	arr.as_ptr(),
5481	_mm256_setr_epi32(`0`, `16`, `64`, `96`, `0`, `0`, `0`, `0`),
5482	_mm256_setr_ps(`-1.0`, `-1.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`),
5483	);
5484	assert_eq_m256(
5485	r,
5486	_mm256_setr_ps(`0.0`, `16.0`, `64.0`, `256.0`, `256.0`, `256.0`, `256.0`, `256.0`),
5487	);
5488	}
5489
5490	#[simd_test(enable = "avx2")]
5491	unsafe fn test_mm_i32gather_epi64() {
5492	let arr: [i64; `128`] = core::array::from_fn(\|i\| i as i64);
5493	// A multiplier of 8 is word-addressing for i64s
5494	let r = _mm_i32gather_epi64::<`8`>(arr.as_ptr(), _mm_setr_epi32(`0`, `16`, `0`, `0`));
5495	assert_eq_m128i(r, _mm_setr_epi64x(`0`, `16`));
5496	}
5497
5498	#[simd_test(enable = "avx2")]
5499	unsafe fn test_mm_mask_i32gather_epi64() {
5500	let arr: [i64; `128`] = core::array::from_fn(\|i\| i as i64);
5501	// A multiplier of 8 is word-addressing for i64s
5502	let r = _mm_mask_i32gather_epi64::<`8`>(
5503	_mm_set1_epi64x(`256`),
5504	arr.as_ptr(),
5505	_mm_setr_epi32(`16`, `16`, `16`, `16`),
5506	_mm_setr_epi64x(`-1`, `0`),
5507	);
5508	assert_eq_m128i(r, _mm_setr_epi64x(`16`, `256`));
5509	}
5510
5511	#[simd_test(enable = "avx2")]
5512	unsafe fn test_mm256_i32gather_epi64() {
5513	let arr: [i64; `128`] = core::array::from_fn(\|i\| i as i64);
5514	// A multiplier of 8 is word-addressing for i64s
5515	let r = _mm256_i32gather_epi64::<`8`>(arr.as_ptr(), _mm_setr_epi32(`0`, `16`, `32`, `48`));
5516	assert_eq_m256i(r, _mm256_setr_epi64x(`0`, `16`, `32`, `48`));
5517	}
5518
5519	#[simd_test(enable = "avx2")]
5520	unsafe fn test_mm256_mask_i32gather_epi64() {
5521	let arr: [i64; `128`] = core::array::from_fn(\|i\| i as i64);
5522	// A multiplier of 8 is word-addressing for i64s
5523	let r = _mm256_mask_i32gather_epi64::<`8`>(
5524	_mm256_set1_epi64x(`256`),
5525	arr.as_ptr(),
5526	_mm_setr_epi32(`0`, `16`, `64`, `96`),
5527	_mm256_setr_epi64x(`-1`, `-1`, `-1`, `0`),
5528	);
5529	assert_eq_m256i(r, _mm256_setr_epi64x(`0`, `16`, `64`, `256`));
5530	}
5531
5532	#[simd_test(enable = "avx2")]
5533	unsafe fn test_mm_i32gather_pd() {
5534	let arr: [f64; `128`] = core::array::from_fn(\|i\| i as f64);
5535	// A multiplier of 8 is word-addressing for f64s
5536	let r = _mm_i32gather_pd::<`8`>(arr.as_ptr(), _mm_setr_epi32(`0`, `16`, `0`, `0`));
5537	assert_eq_m128d(r, _mm_setr_pd(`0.0`, `16.0`));
5538	}
5539
5540	#[simd_test(enable = "avx2")]
5541	unsafe fn test_mm_mask_i32gather_pd() {
5542	let arr: [f64; `128`] = core::array::from_fn(\|i\| i as f64);
5543	// A multiplier of 8 is word-addressing for f64s
5544	let r = _mm_mask_i32gather_pd::<`8`>(
5545	_mm_set1_pd(`256.0`),
5546	arr.as_ptr(),
5547	_mm_setr_epi32(`16`, `16`, `16`, `16`),
5548	_mm_setr_pd(`-1.0`, `0.0`),
5549	);
5550	assert_eq_m128d(r, _mm_setr_pd(`16.0`, `256.0`));
5551	}
5552
5553	#[simd_test(enable = "avx2")]
5554	unsafe fn test_mm256_i32gather_pd() {
5555	let arr: [f64; `128`] = core::array::from_fn(\|i\| i as f64);
5556	// A multiplier of 8 is word-addressing for f64s
5557	let r = _mm256_i32gather_pd::<`8`>(arr.as_ptr(), _mm_setr_epi32(`0`, `16`, `32`, `48`));
5558	assert_eq_m256d(r, _mm256_setr_pd(`0.0`, `16.0`, `32.0`, `48.0`));
5559	}
5560
5561	#[simd_test(enable = "avx2")]
5562	unsafe fn test_mm256_mask_i32gather_pd() {
5563	let arr: [f64; `128`] = core::array::from_fn(\|i\| i as f64);
5564	// A multiplier of 8 is word-addressing for f64s
5565	let r = _mm256_mask_i32gather_pd::<`8`>(
5566	_mm256_set1_pd(`256.0`),
5567	arr.as_ptr(),
5568	_mm_setr_epi32(`0`, `16`, `64`, `96`),
5569	_mm256_setr_pd(`-1.0`, `-1.0`, `-1.0`, `0.0`),
5570	);
5571	assert_eq_m256d(r, _mm256_setr_pd(`0.0`, `16.0`, `64.0`, `256.0`));
5572	}
5573
5574	#[simd_test(enable = "avx2")]
5575	unsafe fn test_mm_i64gather_epi32() {
5576	let arr: [i32; `128`] = core::array::from_fn(\|i\| i as i32);
5577	// A multiplier of 4 is word-addressing
5578	let r = _mm_i64gather_epi32::<`4`>(arr.as_ptr(), _mm_setr_epi64x(`0`, `16`));
5579	assert_eq_m128i(r, _mm_setr_epi32(`0`, `16`, `0`, `0`));
5580	}
5581
5582	#[simd_test(enable = "avx2")]
5583	unsafe fn test_mm_mask_i64gather_epi32() {
5584	let arr: [i32; `128`] = core::array::from_fn(\|i\| i as i32);
5585	// A multiplier of 4 is word-addressing
5586	let r = _mm_mask_i64gather_epi32::<`4`>(
5587	_mm_set1_epi32(`256`),
5588	arr.as_ptr(),
5589	_mm_setr_epi64x(`0`, `16`),
5590	_mm_setr_epi32(`-1`, `0`, `-1`, `0`),
5591	);
5592	assert_eq_m128i(r, _mm_setr_epi32(`0`, `256`, `0`, `0`));
5593	}
5594
5595	#[simd_test(enable = "avx2")]
5596	unsafe fn test_mm256_i64gather_epi32() {
5597	let arr: [i32; `128`] = core::array::from_fn(\|i\| i as i32);
5598	// A multiplier of 4 is word-addressing
5599	let r = _mm256_i64gather_epi32::<`4`>(arr.as_ptr(), _mm256_setr_epi64x(`0`, `16`, `32`, `48`));
5600	assert_eq_m128i(r, _mm_setr_epi32(`0`, `16`, `32`, `48`));
5601	}
5602
5603	#[simd_test(enable = "avx2")]
5604	unsafe fn test_mm256_mask_i64gather_epi32() {
5605	let arr: [i32; `128`] = core::array::from_fn(\|i\| i as i32);
5606	// A multiplier of 4 is word-addressing
5607	let r = _mm256_mask_i64gather_epi32::<`4`>(
5608	_mm_set1_epi32(`256`),
5609	arr.as_ptr(),
5610	_mm256_setr_epi64x(`0`, `16`, `64`, `96`),
5611	_mm_setr_epi32(`-1`, `-1`, `-1`, `0`),
5612	);
5613	assert_eq_m128i(r, _mm_setr_epi32(`0`, `16`, `64`, `256`));
5614	}
5615
5616	#[simd_test(enable = "avx2")]
5617	unsafe fn test_mm_i64gather_ps() {
5618	let arr: [f32; `128`] = core::array::from_fn(\|i\| i as f32);
5619	// A multiplier of 4 is word-addressing for f32s
5620	let r = _mm_i64gather_ps::<`4`>(arr.as_ptr(), _mm_setr_epi64x(`0`, `16`));
5621	assert_eq_m128(r, _mm_setr_ps(`0.0`, `16.0`, `0.0`, `0.0`));
5622	}
5623
5624	#[simd_test(enable = "avx2")]
5625	unsafe fn test_mm_mask_i64gather_ps() {
5626	let arr: [f32; `128`] = core::array::from_fn(\|i\| i as f32);
5627	// A multiplier of 4 is word-addressing for f32s
5628	let r = _mm_mask_i64gather_ps::<`4`>(
5629	_mm_set1_ps(`256.0`),
5630	arr.as_ptr(),
5631	_mm_setr_epi64x(`0`, `16`),
5632	_mm_setr_ps(`-1.0`, `0.0`, `-1.0`, `0.0`),
5633	);
5634	assert_eq_m128(r, _mm_setr_ps(`0.0`, `256.0`, `0.0`, `0.0`));
5635	}
5636
5637	#[simd_test(enable = "avx2")]
5638	unsafe fn test_mm256_i64gather_ps() {
5639	let arr: [f32; `128`] = core::array::from_fn(\|i\| i as f32);
5640	// A multiplier of 4 is word-addressing for f32s
5641	let r = _mm256_i64gather_ps::<`4`>(arr.as_ptr(), _mm256_setr_epi64x(`0`, `16`, `32`, `48`));
5642	assert_eq_m128(r, _mm_setr_ps(`0.0`, `16.0`, `32.0`, `48.0`));
5643	}
5644
5645	#[simd_test(enable = "avx2")]
5646	unsafe fn test_mm256_mask_i64gather_ps() {
5647	let arr: [f32; `128`] = core::array::from_fn(\|i\| i as f32);
5648	// A multiplier of 4 is word-addressing for f32s
5649	let r = _mm256_mask_i64gather_ps::<`4`>(
5650	_mm_set1_ps(`256.0`),
5651	arr.as_ptr(),
5652	_mm256_setr_epi64x(`0`, `16`, `64`, `96`),
5653	_mm_setr_ps(`-1.0`, `-1.0`, `-1.0`, `0.0`),
5654	);
5655	assert_eq_m128(r, _mm_setr_ps(`0.0`, `16.0`, `64.0`, `256.0`));
5656	}
5657
5658	#[simd_test(enable = "avx2")]
5659	unsafe fn test_mm_i64gather_epi64() {
5660	let arr: [i64; `128`] = core::array::from_fn(\|i\| i as i64);
5661	// A multiplier of 8 is word-addressing for i64s
5662	let r = _mm_i64gather_epi64::<`8`>(arr.as_ptr(), _mm_setr_epi64x(`0`, `16`));
5663	assert_eq_m128i(r, _mm_setr_epi64x(`0`, `16`));
5664	}
5665
5666	#[simd_test(enable = "avx2")]
5667	unsafe fn test_mm_mask_i64gather_epi64() {
5668	let arr: [i64; `128`] = core::array::from_fn(\|i\| i as i64);
5669	// A multiplier of 8 is word-addressing for i64s
5670	let r = _mm_mask_i64gather_epi64::<`8`>(
5671	_mm_set1_epi64x(`256`),
5672	arr.as_ptr(),
5673	_mm_setr_epi64x(`16`, `16`),
5674	_mm_setr_epi64x(`-1`, `0`),
5675	);
5676	assert_eq_m128i(r, _mm_setr_epi64x(`16`, `256`));
5677	}
5678
5679	#[simd_test(enable = "avx2")]
5680	unsafe fn test_mm256_i64gather_epi64() {
5681	let arr: [i64; `128`] = core::array::from_fn(\|i\| i as i64);
5682	// A multiplier of 8 is word-addressing for i64s
5683	let r = _mm256_i64gather_epi64::<`8`>(arr.as_ptr(), _mm256_setr_epi64x(`0`, `16`, `32`, `48`));
5684	assert_eq_m256i(r, _mm256_setr_epi64x(`0`, `16`, `32`, `48`));
5685	}
5686
5687	#[simd_test(enable = "avx2")]
5688	unsafe fn test_mm256_mask_i64gather_epi64() {
5689	let arr: [i64; `128`] = core::array::from_fn(\|i\| i as i64);
5690	// A multiplier of 8 is word-addressing for i64s
5691	let r = _mm256_mask_i64gather_epi64::<`8`>(
5692	_mm256_set1_epi64x(`256`),
5693	arr.as_ptr(),
5694	_mm256_setr_epi64x(`0`, `16`, `64`, `96`),
5695	_mm256_setr_epi64x(`-1`, `-1`, `-1`, `0`),
5696	);
5697	assert_eq_m256i(r, _mm256_setr_epi64x(`0`, `16`, `64`, `256`));
5698	}
5699
5700	#[simd_test(enable = "avx2")]
5701	unsafe fn test_mm_i64gather_pd() {
5702	let arr: [f64; `128`] = core::array::from_fn(\|i\| i as f64);
5703	// A multiplier of 8 is word-addressing for f64s
5704	let r = _mm_i64gather_pd::<`8`>(arr.as_ptr(), _mm_setr_epi64x(`0`, `16`));
5705	assert_eq_m128d(r, _mm_setr_pd(`0.0`, `16.0`));
5706	}
5707
5708	#[simd_test(enable = "avx2")]
5709	unsafe fn test_mm_mask_i64gather_pd() {
5710	let arr: [f64; `128`] = core::array::from_fn(\|i\| i as f64);
5711	// A multiplier of 8 is word-addressing for f64s
5712	let r = _mm_mask_i64gather_pd::<`8`>(
5713	_mm_set1_pd(`256.0`),
5714	arr.as_ptr(),
5715	_mm_setr_epi64x(`16`, `16`),
5716	_mm_setr_pd(`-1.0`, `0.0`),
5717	);
5718	assert_eq_m128d(r, _mm_setr_pd(`16.0`, `256.0`));
5719	}
5720
5721	#[simd_test(enable = "avx2")]
5722	unsafe fn test_mm256_i64gather_pd() {
5723	let arr: [f64; `128`] = core::array::from_fn(\|i\| i as f64);
5724	// A multiplier of 8 is word-addressing for f64s
5725	let r = _mm256_i64gather_pd::<`8`>(arr.as_ptr(), _mm256_setr_epi64x(`0`, `16`, `32`, `48`));
5726	assert_eq_m256d(r, _mm256_setr_pd(`0.0`, `16.0`, `32.0`, `48.0`));
5727	}
5728
5729	#[simd_test(enable = "avx2")]
5730	unsafe fn test_mm256_mask_i64gather_pd() {
5731	let arr: [f64; `128`] = core::array::from_fn(\|i\| i as f64);
5732	// A multiplier of 8 is word-addressing for f64s
5733	let r = _mm256_mask_i64gather_pd::<`8`>(
5734	_mm256_set1_pd(`256.0`),
5735	arr.as_ptr(),
5736	_mm256_setr_epi64x(`0`, `16`, `64`, `96`),
5737	_mm256_setr_pd(`-1.0`, `-1.0`, `-1.0`, `0.0`),
5738	);
5739	assert_eq_m256d(r, _mm256_setr_pd(`0.0`, `16.0`, `64.0`, `256.0`));
5740	}
5741
5742	#[simd_test(enable = "avx")]
5743	unsafe fn test_mm256_extract_epi8() {
5744	#[rustfmt::skip]
5745	let a = _mm256_setr_epi8(
5746	`-1`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
5747	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
5748	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
5749	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`
5750	);
5751	let r1 = _mm256_extract_epi8::<`0`>(a);
5752	let r2 = _mm256_extract_epi8::<`3`>(a);
5753	assert_eq!(r1, `0xFF`);
5754	assert_eq!(r2, `3`);
5755	}
5756
5757	#[simd_test(enable = "avx2")]
5758	unsafe fn test_mm256_extract_epi16() {
5759	#[rustfmt::skip]
5760	let a = _mm256_setr_epi16(
5761	`-1`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
5762	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
5763	);
5764	let r1 = _mm256_extract_epi16::<`0`>(a);
5765	let r2 = _mm256_extract_epi16::<`3`>(a);
5766	assert_eq!(r1, `0xFFFF`);
5767	assert_eq!(r2, `3`);
5768	}
5769
5770	#[simd_test(enable = "avx2")]
5771	unsafe fn test_mm256_extract_epi32() {
5772	let a = _mm256_setr_epi32(`-1`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
5773	let r1 = _mm256_extract_epi32::<`0`>(a);
5774	let r2 = _mm256_extract_epi32::<`3`>(a);
5775	assert_eq!(r1, `-1`);
5776	assert_eq!(r2, `3`);
5777	}
5778
5779	#[simd_test(enable = "avx2")]
5780	unsafe fn test_mm256_cvtsd_f64() {
5781	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
5782	let r = _mm256_cvtsd_f64(a);
5783	assert_eq!(r, `1.`);
5784	}
5785
5786	#[simd_test(enable = "avx2")]
5787	unsafe fn test_mm256_cvtsi256_si32() {
5788	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
5789	let r = _mm256_cvtsi256_si32(a);
5790	assert_eq!(r, `1`);
5791	}
5792	}
5793