avx2.rs source code [crates/core_arch/src/x86/avx2.rs]

1	//! Advanced Vector Extensions 2 (AVX)
2	//!
3	//! AVX2 expands most AVX commands to 256-bit wide vector registers and
4	//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
5	//!
6	//! The references are:
7	//!
8	//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
9	//! Instruction Set Reference, A-Z][intel64_ref].
10	//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
11	//! System Instructions][amd64_ref].
12	//!
13	//! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
14	//! overview of the instructions available.
15	//!
16	//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
17	//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
18	//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
19	//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
20
21	use crate::{
22	core_arch::{simd::, simd_llvm::, x86::*},
23	mem::transmute,
24	};
25
26	#[cfg(test)]
27	use stdarch_test::assert_instr;
28
29	/// Computes the absolute values of packed 32-bit integers in `a`.
30	///
31	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi32)
32	#[inline]
33	#[target_feature(enable = "avx2")]
34	#[cfg_attr(test, assert_instr(vpabsd))]
35	#[stable(feature = "simd_x86", since = "1.27.0")]
36	pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i {
37	transmute(src:pabsd(a.as_i32x8()))
38	}
39
40	/// Computes the absolute values of packed 16-bit integers in `a`.
41	///
42	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi16)
43	#[inline]
44	#[target_feature(enable = "avx2")]
45	#[cfg_attr(test, assert_instr(vpabsw))]
46	#[stable(feature = "simd_x86", since = "1.27.0")]
47	pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i {
48	transmute(src:pabsw(a.as_i16x16()))
49	}
50
51	/// Computes the absolute values of packed 8-bit integers in `a`.
52	///
53	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi8)
54	#[inline]
55	#[target_feature(enable = "avx2")]
56	#[cfg_attr(test, assert_instr(vpabsb))]
57	#[stable(feature = "simd_x86", since = "1.27.0")]
58	pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i {
59	transmute(src:pabsb(a.as_i8x32()))
60	}
61
62	/// Adds packed 64-bit integers in `a` and `b`.
63	///
64	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64)
65	#[inline]
66	#[target_feature(enable = "avx2")]
67	#[cfg_attr(test, assert_instr(vpaddq))]
68	#[stable(feature = "simd_x86", since = "1.27.0")]
69	pub unsafe fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
70	transmute(src:simd_add(x:a.as_i64x4(), y:b.as_i64x4()))
71	}
72
73	/// Adds packed 32-bit integers in `a` and `b`.
74	///
75	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi32)
76	#[inline]
77	#[target_feature(enable = "avx2")]
78	#[cfg_attr(test, assert_instr(vpaddd))]
79	#[stable(feature = "simd_x86", since = "1.27.0")]
80	pub unsafe fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
81	transmute(src:simd_add(x:a.as_i32x8(), y:b.as_i32x8()))
82	}
83
84	/// Adds packed 16-bit integers in `a` and `b`.
85	///
86	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi16)
87	#[inline]
88	#[target_feature(enable = "avx2")]
89	#[cfg_attr(test, assert_instr(vpaddw))]
90	#[stable(feature = "simd_x86", since = "1.27.0")]
91	pub unsafe fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
92	transmute(src:simd_add(x:a.as_i16x16(), y:b.as_i16x16()))
93	}
94
95	/// Adds packed 8-bit integers in `a` and `b`.
96	///
97	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi8)
98	#[inline]
99	#[target_feature(enable = "avx2")]
100	#[cfg_attr(test, assert_instr(vpaddb))]
101	#[stable(feature = "simd_x86", since = "1.27.0")]
102	pub unsafe fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
103	transmute(src:simd_add(x:a.as_i8x32(), y:b.as_i8x32()))
104	}
105
106	/// Adds packed 8-bit integers in `a` and `b` using saturation.
107	///
108	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi8)
109	#[inline]
110	#[target_feature(enable = "avx2")]
111	#[cfg_attr(test, assert_instr(vpaddsb))]
112	#[stable(feature = "simd_x86", since = "1.27.0")]
113	pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
114	transmute(src:simd_saturating_add(x:a.as_i8x32(), y:b.as_i8x32()))
115	}
116
117	/// Adds packed 16-bit integers in `a` and `b` using saturation.
118	///
119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi16)
120	#[inline]
121	#[target_feature(enable = "avx2")]
122	#[cfg_attr(test, assert_instr(vpaddsw))]
123	#[stable(feature = "simd_x86", since = "1.27.0")]
124	pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
125	transmute(src:simd_saturating_add(x:a.as_i16x16(), y:b.as_i16x16()))
126	}
127
128	/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
129	///
130	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu8)
131	#[inline]
132	#[target_feature(enable = "avx2")]
133	#[cfg_attr(test, assert_instr(vpaddusb))]
134	#[stable(feature = "simd_x86", since = "1.27.0")]
135	pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
136	transmute(src:simd_saturating_add(x:a.as_u8x32(), y:b.as_u8x32()))
137	}
138
139	/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
140	///
141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu16)
142	#[inline]
143	#[target_feature(enable = "avx2")]
144	#[cfg_attr(test, assert_instr(vpaddusw))]
145	#[stable(feature = "simd_x86", since = "1.27.0")]
146	pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
147	transmute(src:simd_saturating_add(x:a.as_u16x16(), y:b.as_u16x16()))
148	}
149
150	/// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
151	/// result, shifts the result right by `n` bytes, and returns the low 16 bytes.
152	///
153	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi8)
154	#[inline]
155	#[target_feature(enable = "avx2")]
156	#[cfg_attr(test, assert_instr(vpalignr, IMM8 = `7`))]
157	#[rustc_legacy_const_generics(`2`)]
158	#[stable(feature = "simd_x86", since = "1.27.0")]
159	pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
160	static_assert_uimm_bits!(IMM8, `8`);
161	// If palignr is shifting the pair of vectors more than the size of two
162	// lanes, emit zero.
163	if IMM8 > `32` {
164	return _mm256_set1_epi8(`0`);
165	}
166	// If palignr is shifting the pair of input vectors more than one lane,
167	// but less than two lanes, convert to shifting in zeroes.
168	let (a, b) = if IMM8 > `16` {
169	(_mm256_set1_epi8(`0`), a)
170	} else {
171	(a, b)
172	};
173
174	let a = a.as_i8x32();
175	let b = b.as_i8x32();
176
177	let r: i8x32 = match IMM8 % `16` {
178	`0` => simd_shuffle!(
179	b,
180	a,
181	[
182	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`,
183	`23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
184	],
185	),
186	`1` => simd_shuffle!(
187	b,
188	a,
189	[
190	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
191	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `48`,
192	],
193	),
194	`2` => simd_shuffle!(
195	b,
196	a,
197	[
198	`2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
199	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `48`, `49`,
200	],
201	),
202	`3` => simd_shuffle!(
203	b,
204	a,
205	[
206	`3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `19`, `20`, `21`, `22`, `23`, `24`,
207	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `48`, `49`, `50`,
208	],
209	),
210	`4` => simd_shuffle!(
211	b,
212	a,
213	[
214	`4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `20`, `21`, `22`, `23`, `24`, `25`,
215	`26`, `27`, `28`, `29`, `30`, `31`, `48`, `49`, `50`, `51`,
216	],
217	),
218	`5` => simd_shuffle!(
219	b,
220	a,
221	[
222	`5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `21`, `22`, `23`, `24`, `25`, `26`,
223	`27`, `28`, `29`, `30`, `31`, `48`, `49`, `50`, `51`, `52`,
224	],
225	),
226	`6` => simd_shuffle!(
227	b,
228	a,
229	[
230	`6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `22`, `23`, `24`, `25`, `26`, `27`,
231	`28`, `29`, `30`, `31`, `48`, `49`, `50`, `51`, `52`, `53`,
232	],
233	),
234	`7` => simd_shuffle!(
235	b,
236	a,
237	[
238	`7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `23`, `24`, `25`, `26`, `27`,
239	`28`, `29`, `30`, `31`, `48`, `49`, `50`, `51`, `52`, `53`, `54`,
240	],
241	),
242	`8` => simd_shuffle!(
243	b,
244	a,
245	[
246	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `24`, `25`, `26`, `27`, `28`,
247	`29`, `30`, `31`, `48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`,
248	],
249	),
250	`9` => simd_shuffle!(
251	b,
252	a,
253	[
254	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `25`, `26`, `27`, `28`, `29`,
255	`30`, `31`, `48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`,
256	],
257	),
258	`10` => simd_shuffle!(
259	b,
260	a,
261	[
262	`10`, `11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `26`, `27`, `28`, `29`, `30`,
263	`31`, `48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`,
264	],
265	),
266	`11` => simd_shuffle!(
267	b,
268	a,
269	[
270	`11`, `12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `27`, `28`, `29`, `30`, `31`,
271	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`,
272	],
273	),
274	`12` => simd_shuffle!(
275	b,
276	a,
277	[
278	`12`, `13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `28`, `29`, `30`, `31`, `48`,
279	`49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`,
280	],
281	),
282	`13` => simd_shuffle!(
283	b,
284	a,
285	[
286	`13`, `14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `29`, `30`, `31`, `48`, `49`,
287	`50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`,
288	],
289	),
290	`14` => simd_shuffle!(
291	b,
292	a,
293	[
294	`14`, `15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `30`, `31`, `48`, `49`, `50`,
295	`51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`,
296	],
297	),
298	`15` => simd_shuffle!(
299	b,
300	a,
301	[
302	`15`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `31`, `48`, `49`, `50`, `51`,
303	`52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`,
304	],
305	),
306	_ => b,
307	};
308	transmute(r)
309	}
310
311	/// Computes the bitwise AND of 256 bits (representing integer data)
312	/// in `a` and `b`.
313	///
314	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_si256)
315	#[inline]
316	#[target_feature(enable = "avx2")]
317	#[cfg_attr(test, assert_instr(vandps))]
318	#[stable(feature = "simd_x86", since = "1.27.0")]
319	pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
320	transmute(src:simd_and(x:a.as_i64x4(), y:b.as_i64x4()))
321	}
322
323	/// Computes the bitwise NOT of 256 bits (representing integer data)
324	/// in `a` and then AND with `b`.
325	///
326	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_si256)
327	#[inline]
328	#[target_feature(enable = "avx2")]
329	#[cfg_attr(test, assert_instr(vandnps))]
330	#[stable(feature = "simd_x86", since = "1.27.0")]
331	pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
332	let all_ones: __m256i = _mm256_set1_epi8(`-1`);
333	transmute(src:simd_and(
334	x:simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
335	y:b.as_i64x4(),
336	))
337	}
338
339	/// Averages packed unsigned 16-bit integers in `a` and `b`.
340	///
341	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu16)
342	#[inline]
343	#[target_feature(enable = "avx2")]
344	#[cfg_attr(test, assert_instr(vpavgw))]
345	#[stable(feature = "simd_x86", since = "1.27.0")]
346	pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
347	let a: u32x16 = simd_cast::<_, u32x16>(a.as_u16x16());
348	let b: u32x16 = simd_cast::<_, u32x16>(b.as_u16x16());
349	let r: u32x16 = simd_shr(x:simd_add(simd_add(a, b), u32x16::splat(`1`)), y:u32x16::splat(`1`));
350	transmute(src:simd_cast::<_, u16x16>(r))
351	}
352
353	/// Averages packed unsigned 8-bit integers in `a` and `b`.
354	///
355	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu8)
356	#[inline]
357	#[target_feature(enable = "avx2")]
358	#[cfg_attr(test, assert_instr(vpavgb))]
359	#[stable(feature = "simd_x86", since = "1.27.0")]
360	pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
361	let a: u16x32 = simd_cast::<_, u16x32>(a.as_u8x32());
362	let b: u16x32 = simd_cast::<_, u16x32>(b.as_u8x32());
363	let r: u16x32 = simd_shr(x:simd_add(simd_add(a, b), u16x32::splat(`1`)), y:u16x32::splat(`1`));
364	transmute(src:simd_cast::<_, u8x32>(r))
365	}
366
367	/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM4`.
368	///
369	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi32)
370	#[inline]
371	#[target_feature(enable = "avx2")]
372	#[cfg_attr(test, assert_instr(vblendps, IMM4 = `9`))]
373	#[rustc_legacy_const_generics(`2`)]
374	#[stable(feature = "simd_x86", since = "1.27.0")]
375	pub unsafe fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
376	static_assert_uimm_bits!(IMM4, `4`);
377	let a: i32x4 = a.as_i32x4();
378	let b: i32x4 = b.as_i32x4();
379	let r: i32x4 = simd_shuffle!(
380	a,
381	b,
382	[
383	[`0`, `4`, `0`, `4`][IMM4 as usize & `0b11`],
384	[`1`, `1`, `5`, `5`][IMM4 as usize & `0b11`],
385	[`2`, `6`, `2`, `6`][(IMM4 as usize >> `2`) & `0b11`],
386	[`3`, `3`, `7`, `7`][(IMM4 as usize >> `2`) & `0b11`],
387	],
388	);
389	transmute(src:r)
390	}
391
392	/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM8`.
393	///
394	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi32)
395	#[inline]
396	#[target_feature(enable = "avx2")]
397	#[cfg_attr(test, assert_instr(vblendps, IMM8 = `9`))]
398	#[rustc_legacy_const_generics(`2`)]
399	#[stable(feature = "simd_x86", since = "1.27.0")]
400	pub unsafe fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
401	static_assert_uimm_bits!(IMM8, `8`);
402	let a: i32x8 = a.as_i32x8();
403	let b: i32x8 = b.as_i32x8();
404	let r: i32x8 = simd_shuffle!(
405	a,
406	b,
407	[
408	[`0`, `8`, `0`, `8`][IMM8 as usize & `0b11`],
409	[`1`, `1`, `9`, `9`][IMM8 as usize & `0b11`],
410	[`2`, `10`, `2`, `10`][(IMM8 as usize >> `2`) & `0b11`],
411	[`3`, `3`, `11`, `11`][(IMM8 as usize >> `2`) & `0b11`],
412	[`4`, `12`, `4`, `12`][(IMM8 as usize >> `4`) & `0b11`],
413	[`5`, `5`, `13`, `13`][(IMM8 as usize >> `4`) & `0b11`],
414	[`6`, `14`, `6`, `14`][(IMM8 as usize >> `6`) & `0b11`],
415	[`7`, `7`, `15`, `15`][(IMM8 as usize >> `6`) & `0b11`],
416	],
417	);
418	transmute(src:r)
419	}
420
421	/// Blends packed 16-bit integers from `a` and `b` using control mask `IMM8`.
422	///
423	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi16)
424	#[inline]
425	#[target_feature(enable = "avx2")]
426	#[cfg_attr(test, assert_instr(vpblendw, IMM8 = `9`))]
427	#[rustc_legacy_const_generics(`2`)]
428	#[stable(feature = "simd_x86", since = "1.27.0")]
429	pub unsafe fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
430	static_assert_uimm_bits!(IMM8, `8`);
431	let a = a.as_i16x16();
432	let b = b.as_i16x16();
433
434	let r: i16x16 = simd_shuffle!(
435	a,
436	b,
437	[
438	[`0`, `16`, `0`, `16`][IMM8 as usize & `0b11`],
439	[`1`, `1`, `17`, `17`][IMM8 as usize & `0b11`],
440	[`2`, `18`, `2`, `18`][(IMM8 as usize >> `2`) & `0b11`],
441	[`3`, `3`, `19`, `19`][(IMM8 as usize >> `2`) & `0b11`],
442	[`4`, `20`, `4`, `20`][(IMM8 as usize >> `4`) & `0b11`],
443	[`5`, `5`, `21`, `21`][(IMM8 as usize >> `4`) & `0b11`],
444	[`6`, `22`, `6`, `22`][(IMM8 as usize >> `6`) & `0b11`],
445	[`7`, `7`, `23`, `23`][(IMM8 as usize >> `6`) & `0b11`],
446	[`8`, `24`, `8`, `24`][IMM8 as usize & `0b11`],
447	[`9`, `9`, `25`, `25`][IMM8 as usize & `0b11`],
448	[`10`, `26`, `10`, `26`][(IMM8 as usize >> `2`) & `0b11`],
449	[`11`, `11`, `27`, `27`][(IMM8 as usize >> `2`) & `0b11`],
450	[`12`, `28`, `12`, `28`][(IMM8 as usize >> `4`) & `0b11`],
451	[`13`, `13`, `29`, `29`][(IMM8 as usize >> `4`) & `0b11`],
452	[`14`, `30`, `14`, `30`][(IMM8 as usize >> `6`) & `0b11`],
453	[`15`, `15`, `31`, `31`][(IMM8 as usize >> `6`) & `0b11`],
454	],
455	);
456	transmute(r)
457	}
458
459	/// Blends packed 8-bit integers from `a` and `b` using `mask`.
460	///
461	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_epi8)
462	#[inline]
463	#[target_feature(enable = "avx2")]
464	#[cfg_attr(test, assert_instr(vpblendvb))]
465	#[stable(feature = "simd_x86", since = "1.27.0")]
466	pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
467	let mask: i8x32 = simd_lt(x:mask.as_i8x32(), y:i8x32::splat(`0`));
468	transmute(src:simd_select(m:mask, a:b.as_i8x32(), b:a.as_i8x32()))
469	}
470
471	/// Broadcasts the low packed 8-bit integer from `a` to all elements of
472	/// the 128-bit returned value.
473	///
474	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastb_epi8)
475	#[inline]
476	#[target_feature(enable = "avx2")]
477	#[cfg_attr(test, assert_instr(vpbroadcastb))]
478	#[stable(feature = "simd_x86", since = "1.27.0")]
479	pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
480	let zero: __m128i = _mm_setzero_si128();
481	let ret: i8x16 = simd_shuffle!(a.as_i8x16(), zero.as_i8x16(), [`0_u32`; `16`]);
482	transmute::<i8x16, _>(src:ret)
483	}
484
485	/// Broadcasts the low packed 8-bit integer from `a` to all elements of
486	/// the 256-bit returned value.
487	///
488	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastb_epi8)
489	#[inline]
490	#[target_feature(enable = "avx2")]
491	#[cfg_attr(test, assert_instr(vpbroadcastb))]
492	#[stable(feature = "simd_x86", since = "1.27.0")]
493	pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
494	let zero: __m128i = _mm_setzero_si128();
495	let ret: i8x32 = simd_shuffle!(a.as_i8x16(), zero.as_i8x16(), [`0_u32`; `32`]);
496	transmute::<i8x32, _>(src:ret)
497	}
498
499	// N.B., `simd_shuffle4` with integer data types for `a` and `b` is
500	// often compiled to `vbroadcastss`.
501	/// Broadcasts the low packed 32-bit integer from `a` to all elements of
502	/// the 128-bit returned value.
503	///
504	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastd_epi32)
505	#[inline]
506	#[target_feature(enable = "avx2")]
507	#[cfg_attr(test, assert_instr(vbroadcastss))]
508	#[stable(feature = "simd_x86", since = "1.27.0")]
509	pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
510	let zero: __m128i = _mm_setzero_si128();
511	let ret: i32x4 = simd_shuffle!(a.as_i32x4(), zero.as_i32x4(), [`0_u32`; `4`]);
512	transmute::<i32x4, _>(src:ret)
513	}
514
515	// N.B., `simd_shuffle4`` with integer data types for `a` and `b` is
516	// often compiled to `vbroadcastss`.
517	/// Broadcasts the low packed 32-bit integer from `a` to all elements of
518	/// the 256-bit returned value.
519	///
520	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastd_epi32)
521	#[inline]
522	#[target_feature(enable = "avx2")]
523	#[cfg_attr(test, assert_instr(vbroadcastss))]
524	#[stable(feature = "simd_x86", since = "1.27.0")]
525	pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
526	let zero: __m128i = _mm_setzero_si128();
527	let ret: i32x8 = simd_shuffle!(a.as_i32x4(), zero.as_i32x4(), [`0_u32`; `8`]);
528	transmute::<i32x8, _>(src:ret)
529	}
530
531	/// Broadcasts the low packed 64-bit integer from `a` to all elements of
532	/// the 128-bit returned value.
533	///
534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastq_epi64)
535	#[inline]
536	#[target_feature(enable = "avx2")]
537	// Emits `vmovddup` instead of `vpbroadcastq`
538	// See https://github.com/rust-lang/stdarch/issues/791
539	#[cfg_attr(test, assert_instr(vmovddup))]
540	#[stable(feature = "simd_x86", since = "1.27.0")]
541	pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
542	let ret: i64x2 = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [`0_u32`; `2`]);
543	transmute::<i64x2, _>(src:ret)
544	}
545
546	/// Broadcasts the low packed 64-bit integer from `a` to all elements of
547	/// the 256-bit returned value.
548	///
549	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastq_epi64)
550	#[inline]
551	#[target_feature(enable = "avx2")]
552	#[cfg_attr(test, assert_instr(vbroadcastsd))]
553	#[stable(feature = "simd_x86", since = "1.27.0")]
554	pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
555	let ret: i64x4 = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [`0_u32`; `4`]);
556	transmute::<i64x4, _>(src:ret)
557	}
558
559	/// Broadcasts the low double-precision (64-bit) floating-point element
560	/// from `a` to all elements of the 128-bit returned value.
561	///
562	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastsd_pd)
563	#[inline]
564	#[target_feature(enable = "avx2")]
565	#[cfg_attr(test, assert_instr(vmovddup))]
566	#[stable(feature = "simd_x86", since = "1.27.0")]
567	pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
568	simd_shuffle!(a, _mm_setzero_pd(), [`0_u32`; `2`])
569	}
570
571	/// Broadcasts the low double-precision (64-bit) floating-point element
572	/// from `a` to all elements of the 256-bit returned value.
573	///
574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsd_pd)
575	#[inline]
576	#[target_feature(enable = "avx2")]
577	#[cfg_attr(test, assert_instr(vbroadcastsd))]
578	#[stable(feature = "simd_x86", since = "1.27.0")]
579	pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
580	simd_shuffle!(a, _mm_setzero_pd(), [`0_u32`; `4`])
581	}
582
583	// N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or
584	// `vbroadcastf128`.
585	/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
586	/// the 256-bit returned value.
587	///
588	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsi128_si256)
589	#[inline]
590	#[target_feature(enable = "avx2")]
591	#[stable(feature = "simd_x86", since = "1.27.0")]
592	pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
593	let zero: __m128i = _mm_setzero_si128();
594	let ret: i64x4 = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [`0`, `1`, `0`, `1`]);
595	transmute::<i64x4, _>(src:ret)
596	}
597
598	/// Broadcasts the low single-precision (32-bit) floating-point element
599	/// from `a` to all elements of the 128-bit returned value.
600	///
601	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastss_ps)
602	#[inline]
603	#[target_feature(enable = "avx2")]
604	#[cfg_attr(test, assert_instr(vbroadcastss))]
605	#[stable(feature = "simd_x86", since = "1.27.0")]
606	pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 {
607	simd_shuffle!(a, _mm_setzero_ps(), [`0_u32`; `4`])
608	}
609
610	/// Broadcasts the low single-precision (32-bit) floating-point element
611	/// from `a` to all elements of the 256-bit returned value.
612	///
613	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastss_ps)
614	#[inline]
615	#[target_feature(enable = "avx2")]
616	#[cfg_attr(test, assert_instr(vbroadcastss))]
617	#[stable(feature = "simd_x86", since = "1.27.0")]
618	pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
619	simd_shuffle!(a, _mm_setzero_ps(), [`0_u32`; `8`])
620	}
621
622	/// Broadcasts the low packed 16-bit integer from a to all elements of
623	/// the 128-bit returned value
624	///
625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastw_epi16)
626	#[inline]
627	#[target_feature(enable = "avx2")]
628	#[cfg_attr(test, assert_instr(vpbroadcastw))]
629	#[stable(feature = "simd_x86", since = "1.27.0")]
630	pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
631	let zero: __m128i = _mm_setzero_si128();
632	let ret: i16x8 = simd_shuffle!(a.as_i16x8(), zero.as_i16x8(), [`0_u32`; `8`]);
633	transmute::<i16x8, _>(src:ret)
634	}
635
636	/// Broadcasts the low packed 16-bit integer from a to all elements of
637	/// the 256-bit returned value
638	///
639	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastw_epi16)
640	#[inline]
641	#[target_feature(enable = "avx2")]
642	#[cfg_attr(test, assert_instr(vpbroadcastw))]
643	#[stable(feature = "simd_x86", since = "1.27.0")]
644	pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
645	let zero: __m128i = _mm_setzero_si128();
646	let ret: i16x16 = simd_shuffle!(a.as_i16x8(), zero.as_i16x8(), [`0_u32`; `16`]);
647	transmute::<i16x16, _>(src:ret)
648	}
649
650	/// Compares packed 64-bit integers in `a` and `b` for equality.
651	///
652	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64)
653	#[inline]
654	#[target_feature(enable = "avx2")]
655	#[cfg_attr(test, assert_instr(vpcmpeqq))]
656	#[stable(feature = "simd_x86", since = "1.27.0")]
657	pub unsafe fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
658	transmute::<i64x4, _>(src:simd_eq(x:a.as_i64x4(), y:b.as_i64x4()))
659	}
660
661	/// Compares packed 32-bit integers in `a` and `b` for equality.
662	///
663	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32)
664	#[inline]
665	#[target_feature(enable = "avx2")]
666	#[cfg_attr(test, assert_instr(vpcmpeqd))]
667	#[stable(feature = "simd_x86", since = "1.27.0")]
668	pub unsafe fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
669	transmute::<i32x8, _>(src:simd_eq(x:a.as_i32x8(), y:b.as_i32x8()))
670	}
671
672	/// Compares packed 16-bit integers in `a` and `b` for equality.
673	///
674	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16)
675	#[inline]
676	#[target_feature(enable = "avx2")]
677	#[cfg_attr(test, assert_instr(vpcmpeqw))]
678	#[stable(feature = "simd_x86", since = "1.27.0")]
679	pub unsafe fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
680	transmute::<i16x16, _>(src:simd_eq(x:a.as_i16x16(), y:b.as_i16x16()))
681	}
682
683	/// Compares packed 8-bit integers in `a` and `b` for equality.
684	///
685	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8)
686	#[inline]
687	#[target_feature(enable = "avx2")]
688	#[cfg_attr(test, assert_instr(vpcmpeqb))]
689	#[stable(feature = "simd_x86", since = "1.27.0")]
690	pub unsafe fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
691	transmute::<i8x32, _>(src:simd_eq(x:a.as_i8x32(), y:b.as_i8x32()))
692	}
693
694	/// Compares packed 64-bit integers in `a` and `b` for greater-than.
695	///
696	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64)
697	#[inline]
698	#[target_feature(enable = "avx2")]
699	#[cfg_attr(test, assert_instr(vpcmpgtq))]
700	#[stable(feature = "simd_x86", since = "1.27.0")]
701	pub unsafe fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
702	transmute::<i64x4, _>(src:simd_gt(x:a.as_i64x4(), y:b.as_i64x4()))
703	}
704
705	/// Compares packed 32-bit integers in `a` and `b` for greater-than.
706	///
707	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32)
708	#[inline]
709	#[target_feature(enable = "avx2")]
710	#[cfg_attr(test, assert_instr(vpcmpgtd))]
711	#[stable(feature = "simd_x86", since = "1.27.0")]
712	pub unsafe fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
713	transmute::<i32x8, _>(src:simd_gt(x:a.as_i32x8(), y:b.as_i32x8()))
714	}
715
716	/// Compares packed 16-bit integers in `a` and `b` for greater-than.
717	///
718	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16)
719	#[inline]
720	#[target_feature(enable = "avx2")]
721	#[cfg_attr(test, assert_instr(vpcmpgtw))]
722	#[stable(feature = "simd_x86", since = "1.27.0")]
723	pub unsafe fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
724	transmute::<i16x16, _>(src:simd_gt(x:a.as_i16x16(), y:b.as_i16x16()))
725	}
726
727	/// Compares packed 8-bit integers in `a` and `b` for greater-than.
728	///
729	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8)
730	#[inline]
731	#[target_feature(enable = "avx2")]
732	#[cfg_attr(test, assert_instr(vpcmpgtb))]
733	#[stable(feature = "simd_x86", since = "1.27.0")]
734	pub unsafe fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
735	transmute::<i8x32, _>(src:simd_gt(x:a.as_i8x32(), y:b.as_i8x32()))
736	}
737
738	/// Sign-extend 16-bit integers to 32-bit integers.
739	///
740	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi32)
741	#[inline]
742	#[target_feature(enable = "avx2")]
743	#[cfg_attr(test, assert_instr(vpmovsxwd))]
744	#[stable(feature = "simd_x86", since = "1.27.0")]
745	pub unsafe fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
746	transmute::<i32x8, _>(src:simd_cast(a.as_i16x8()))
747	}
748
749	/// Sign-extend 16-bit integers to 64-bit integers.
750	///
751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi64)
752	#[inline]
753	#[target_feature(enable = "avx2")]
754	#[cfg_attr(test, assert_instr(vpmovsxwq))]
755	#[stable(feature = "simd_x86", since = "1.27.0")]
756	pub unsafe fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
757	let a: i16x8 = a.as_i16x8();
758	let v64: i16x4 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]);
759	transmute::<i64x4, _>(src:simd_cast(v64))
760	}
761
762	/// Sign-extend 32-bit integers to 64-bit integers.
763	///
764	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi64)
765	#[inline]
766	#[target_feature(enable = "avx2")]
767	#[cfg_attr(test, assert_instr(vpmovsxdq))]
768	#[stable(feature = "simd_x86", since = "1.27.0")]
769	pub unsafe fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
770	transmute::<i64x4, _>(src:simd_cast(a.as_i32x4()))
771	}
772
773	/// Sign-extend 8-bit integers to 16-bit integers.
774	///
775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi16)
776	#[inline]
777	#[target_feature(enable = "avx2")]
778	#[cfg_attr(test, assert_instr(vpmovsxbw))]
779	#[stable(feature = "simd_x86", since = "1.27.0")]
780	pub unsafe fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
781	transmute::<i16x16, _>(src:simd_cast(a.as_i8x16()))
782	}
783
784	/// Sign-extend 8-bit integers to 32-bit integers.
785	///
786	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi32)
787	#[inline]
788	#[target_feature(enable = "avx2")]
789	#[cfg_attr(test, assert_instr(vpmovsxbd))]
790	#[stable(feature = "simd_x86", since = "1.27.0")]
791	pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
792	let a: i8x16 = a.as_i8x16();
793	let v64: i8x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
794	transmute::<i32x8, _>(src:simd_cast(v64))
795	}
796
797	/// Sign-extend 8-bit integers to 64-bit integers.
798	///
799	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi64)
800	#[inline]
801	#[target_feature(enable = "avx2")]
802	#[cfg_attr(test, assert_instr(vpmovsxbq))]
803	#[stable(feature = "simd_x86", since = "1.27.0")]
804	pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
805	let a: i8x16 = a.as_i8x16();
806	let v32: i8x4 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]);
807	transmute::<i64x4, _>(src:simd_cast(v32))
808	}
809
810	/// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit
811	/// integers, and stores the results in `dst`.
812	///
813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi32)
814	#[inline]
815	#[target_feature(enable = "avx2")]
816	#[cfg_attr(test, assert_instr(vpmovzxwd))]
817	#[stable(feature = "simd_x86", since = "1.27.0")]
818	pub unsafe fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
819	transmute::<i32x8, _>(src:simd_cast(a.as_u16x8()))
820	}
821
822	/// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit
823	/// integers. The upper four elements of `a` are unused.
824	///
825	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi64)
826	#[inline]
827	#[target_feature(enable = "avx2")]
828	#[cfg_attr(test, assert_instr(vpmovzxwq))]
829	#[stable(feature = "simd_x86", since = "1.27.0")]
830	pub unsafe fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
831	let a: u16x8 = a.as_u16x8();
832	let v64: u16x4 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]);
833	transmute::<i64x4, _>(src:simd_cast(v64))
834	}
835
836	/// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers.
837	///
838	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_epi64)
839	#[inline]
840	#[target_feature(enable = "avx2")]
841	#[cfg_attr(test, assert_instr(vpmovzxdq))]
842	#[stable(feature = "simd_x86", since = "1.27.0")]
843	pub unsafe fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
844	transmute::<i64x4, _>(src:simd_cast(a.as_u32x4()))
845	}
846
847	/// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers.
848	///
849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi16)
850	#[inline]
851	#[target_feature(enable = "avx2")]
852	#[cfg_attr(test, assert_instr(vpmovzxbw))]
853	#[stable(feature = "simd_x86", since = "1.27.0")]
854	pub unsafe fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
855	transmute::<i16x16, _>(src:simd_cast(a.as_u8x16()))
856	}
857
858	/// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit
859	/// integers. The upper eight elements of `a` are unused.
860	///
861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi32)
862	#[inline]
863	#[target_feature(enable = "avx2")]
864	#[cfg_attr(test, assert_instr(vpmovzxbd))]
865	#[stable(feature = "simd_x86", since = "1.27.0")]
866	pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
867	let a: u8x16 = a.as_u8x16();
868	let v64: u8x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
869	transmute::<i32x8, _>(src:simd_cast(v64))
870	}
871
872	/// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit
873	/// integers. The upper twelve elements of `a` are unused.
874	///
875	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi64)
876	#[inline]
877	#[target_feature(enable = "avx2")]
878	#[cfg_attr(test, assert_instr(vpmovzxbq))]
879	#[stable(feature = "simd_x86", since = "1.27.0")]
880	pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
881	let a: u8x16 = a.as_u8x16();
882	let v32: u8x4 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]);
883	transmute::<i64x4, _>(src:simd_cast(v32))
884	}
885
886	/// Extracts 128 bits (of integer data) from `a` selected with `IMM1`.
887	///
888	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti128_si256)
889	#[inline]
890	#[target_feature(enable = "avx2")]
891	#[cfg_attr(
892	all(test, not(target_os = "windows")),
893	assert_instr(vextractf128, IMM1 = `1`)
894	)]
895	#[rustc_legacy_const_generics(`1`)]
896	#[stable(feature = "simd_x86", since = "1.27.0")]
897	pub unsafe fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
898	static_assert_uimm_bits!(IMM1, `1`);
899	let a: i64x4 = a.as_i64x4();
900	let b: i64x4 = _mm256_undefined_si256().as_i64x4();
901	let dst: i64x2 = simd_shuffle!(a, b, [[`0`, `1`], [`2`, `3`]][IMM1 as usize]);
902	transmute(src:dst)
903	}
904
905	/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`.
906	///
907	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi16)
908	#[inline]
909	#[target_feature(enable = "avx2")]
910	#[cfg_attr(test, assert_instr(vphaddw))]
911	#[stable(feature = "simd_x86", since = "1.27.0")]
912	pub unsafe fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
913	transmute(src:phaddw(a:a.as_i16x16(), b:b.as_i16x16()))
914	}
915
916	/// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`.
917	///
918	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi32)
919	#[inline]
920	#[target_feature(enable = "avx2")]
921	#[cfg_attr(test, assert_instr(vphaddd))]
922	#[stable(feature = "simd_x86", since = "1.27.0")]
923	pub unsafe fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
924	transmute(src:phaddd(a:a.as_i32x8(), b:b.as_i32x8()))
925	}
926
927	/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`
928	/// using saturation.
929	///
930	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadds_epi16)
931	#[inline]
932	#[target_feature(enable = "avx2")]
933	#[cfg_attr(test, assert_instr(vphaddsw))]
934	#[stable(feature = "simd_x86", since = "1.27.0")]
935	pub unsafe fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
936	transmute(src:phaddsw(a:a.as_i16x16(), b:b.as_i16x16()))
937	}
938
939	/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`.
940	///
941	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi16)
942	#[inline]
943	#[target_feature(enable = "avx2")]
944	#[cfg_attr(test, assert_instr(vphsubw))]
945	#[stable(feature = "simd_x86", since = "1.27.0")]
946	pub unsafe fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
947	transmute(src:phsubw(a:a.as_i16x16(), b:b.as_i16x16()))
948	}
949
950	/// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`.
951	///
952	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi32)
953	#[inline]
954	#[target_feature(enable = "avx2")]
955	#[cfg_attr(test, assert_instr(vphsubd))]
956	#[stable(feature = "simd_x86", since = "1.27.0")]
957	pub unsafe fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
958	transmute(src:phsubd(a:a.as_i32x8(), b:b.as_i32x8()))
959	}
960
961	/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
962	/// using saturation.
963	///
964	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsubs_epi16)
965	#[inline]
966	#[target_feature(enable = "avx2")]
967	#[cfg_attr(test, assert_instr(vphsubsw))]
968	#[stable(feature = "simd_x86", since = "1.27.0")]
969	pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
970	transmute(src:phsubsw(a:a.as_i16x16(), b:b.as_i16x16()))
971	}
972
973	/// Returns values from `slice` at offsets determined by `offsets scale`,*
974	/// where
975	/// `scale` should be 1, 2, 4 or 8.
976	///
977	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi32)
978	#[inline]
979	#[target_feature(enable = "avx2")]
980	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
981	#[rustc_legacy_const_generics(`2`)]
982	#[stable(feature = "simd_x86", since = "1.27.0")]
983	pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
984	slice: *const i32,
985	offsets: __m128i,
986	) -> __m128i {
987	static_assert_imm8_scale!(SCALE);
988	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
989	let neg_one: i32x4 = _mm_set1_epi32(`-1`).as_i32x4();
990	let offsets: i32x4 = offsets.as_i32x4();
991	let slice: const i8 = slice as const i8;
992	let r: i32x4 = pgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
993	transmute(src:r)
994	}
995
996	/// Returns values from `slice` at offsets determined by `offsets scale`,*
997	/// where
998	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
999	/// that position instead.
1000	///
1001	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi32)
1002	#[inline]
1003	#[target_feature(enable = "avx2")]
1004	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
1005	#[rustc_legacy_const_generics(`4`)]
1006	#[stable(feature = "simd_x86", since = "1.27.0")]
1007	pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1008	src: __m128i,
1009	slice: *const i32,
1010	offsets: __m128i,
1011	mask: __m128i,
1012	) -> __m128i {
1013	static_assert_imm8_scale!(SCALE);
1014	let src: i32x4 = src.as_i32x4();
1015	let mask: i32x4 = mask.as_i32x4();
1016	let offsets: i32x4 = offsets.as_i32x4();
1017	let slice: const i8 = slice as const i8;
1018	let r: i32x4 = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1019	transmute(src:r)
1020	}
1021
1022	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1023	/// where
1024	/// `scale` should be 1, 2, 4 or 8.
1025	///
1026	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi32)
1027	#[inline]
1028	#[target_feature(enable = "avx2")]
1029	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
1030	#[rustc_legacy_const_generics(`2`)]
1031	#[stable(feature = "simd_x86", since = "1.27.0")]
1032	pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1033	slice: *const i32,
1034	offsets: __m256i,
1035	) -> __m256i {
1036	static_assert_imm8_scale!(SCALE);
1037	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
1038	let neg_one: i32x8 = _mm256_set1_epi32(`-1`).as_i32x8();
1039	let offsets: i32x8 = offsets.as_i32x8();
1040	let slice: const i8 = slice as const i8;
1041	let r: i32x8 = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1042	transmute(src:r)
1043	}
1044
1045	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1046	/// where
1047	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1048	/// that position instead.
1049	///
1050	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi32)
1051	#[inline]
1052	#[target_feature(enable = "avx2")]
1053	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
1054	#[rustc_legacy_const_generics(`4`)]
1055	#[stable(feature = "simd_x86", since = "1.27.0")]
1056	pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1057	src: __m256i,
1058	slice: *const i32,
1059	offsets: __m256i,
1060	mask: __m256i,
1061	) -> __m256i {
1062	static_assert_imm8_scale!(SCALE);
1063	let src: i32x8 = src.as_i32x8();
1064	let mask: i32x8 = mask.as_i32x8();
1065	let offsets: i32x8 = offsets.as_i32x8();
1066	let slice: const i8 = slice as const i8;
1067	let r: i32x8 = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1068	transmute(src:r)
1069	}
1070
1071	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1072	/// where
1073	/// `scale` should be 1, 2, 4 or 8.
1074	///
1075	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_ps)
1076	#[inline]
1077	#[target_feature(enable = "avx2")]
1078	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
1079	#[rustc_legacy_const_generics(`2`)]
1080	#[stable(feature = "simd_x86", since = "1.27.0")]
1081	pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1082	static_assert_imm8_scale!(SCALE);
1083	let zero: __m128 = _mm_setzero_ps();
1084	let neg_one: __m128 = _mm_set1_ps(`-1.0`);
1085	let offsets: i32x4 = offsets.as_i32x4();
1086	let slice: const i8 = slice as const i8;
1087	pgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1088	}
1089
1090	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1091	/// where
1092	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1093	/// that position instead.
1094	///
1095	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_ps)
1096	#[inline]
1097	#[target_feature(enable = "avx2")]
1098	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
1099	#[rustc_legacy_const_generics(`4`)]
1100	#[stable(feature = "simd_x86", since = "1.27.0")]
1101	pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1102	src: __m128,
1103	slice: *const f32,
1104	offsets: __m128i,
1105	mask: __m128,
1106	) -> __m128 {
1107	static_assert_imm8_scale!(SCALE);
1108	let offsets: i32x4 = offsets.as_i32x4();
1109	let slice: const i8 = slice as const i8;
1110	pgatherdps(src, slice, offsets, mask, SCALE as i8)
1111	}
1112
1113	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1114	/// where
1115	/// `scale` should be 1, 2, 4 or 8.
1116	///
1117	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_ps)
1118	#[inline]
1119	#[target_feature(enable = "avx2")]
1120	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
1121	#[rustc_legacy_const_generics(`2`)]
1122	#[stable(feature = "simd_x86", since = "1.27.0")]
1123	pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1124	static_assert_imm8_scale!(SCALE);
1125	let zero: __m256 = _mm256_setzero_ps();
1126	let neg_one: __m256 = _mm256_set1_ps(`-1.0`);
1127	let offsets: i32x8 = offsets.as_i32x8();
1128	let slice: const i8 = slice as const i8;
1129	vpgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1130	}
1131
1132	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1133	/// where
1134	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1135	/// that position instead.
1136	///
1137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_ps)
1138	#[inline]
1139	#[target_feature(enable = "avx2")]
1140	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
1141	#[rustc_legacy_const_generics(`4`)]
1142	#[stable(feature = "simd_x86", since = "1.27.0")]
1143	pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1144	src: __m256,
1145	slice: *const f32,
1146	offsets: __m256i,
1147	mask: __m256,
1148	) -> __m256 {
1149	static_assert_imm8_scale!(SCALE);
1150	let offsets: i32x8 = offsets.as_i32x8();
1151	let slice: const i8 = slice as const i8;
1152	vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1153	}
1154
1155	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1156	/// where
1157	/// `scale` should be 1, 2, 4 or 8.
1158	///
1159	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi64)
1160	#[inline]
1161	#[target_feature(enable = "avx2")]
1162	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
1163	#[rustc_legacy_const_generics(`2`)]
1164	#[stable(feature = "simd_x86", since = "1.27.0")]
1165	pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1166	slice: *const i64,
1167	offsets: __m128i,
1168	) -> __m128i {
1169	static_assert_imm8_scale!(SCALE);
1170	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1171	let neg_one: i64x2 = _mm_set1_epi64x(`-1`).as_i64x2();
1172	let offsets: i32x4 = offsets.as_i32x4();
1173	let slice: const i8 = slice as const i8;
1174	let r: i64x2 = pgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1175	transmute(src:r)
1176	}
1177
1178	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1179	/// where
1180	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1181	/// that position instead.
1182	///
1183	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi64)
1184	#[inline]
1185	#[target_feature(enable = "avx2")]
1186	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
1187	#[rustc_legacy_const_generics(`4`)]
1188	#[stable(feature = "simd_x86", since = "1.27.0")]
1189	pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1190	src: __m128i,
1191	slice: *const i64,
1192	offsets: __m128i,
1193	mask: __m128i,
1194	) -> __m128i {
1195	static_assert_imm8_scale!(SCALE);
1196	let src: i64x2 = src.as_i64x2();
1197	let mask: i64x2 = mask.as_i64x2();
1198	let offsets: i32x4 = offsets.as_i32x4();
1199	let slice: const i8 = slice as const i8;
1200	let r: i64x2 = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1201	transmute(src:r)
1202	}
1203
1204	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1205	/// where
1206	/// `scale` should be 1, 2, 4 or 8.
1207	///
1208	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi64)
1209	#[inline]
1210	#[target_feature(enable = "avx2")]
1211	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
1212	#[rustc_legacy_const_generics(`2`)]
1213	#[stable(feature = "simd_x86", since = "1.27.0")]
1214	pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1215	slice: *const i64,
1216	offsets: __m128i,
1217	) -> __m256i {
1218	static_assert_imm8_scale!(SCALE);
1219	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1220	let neg_one: i64x4 = _mm256_set1_epi64x(`-1`).as_i64x4();
1221	let offsets: i32x4 = offsets.as_i32x4();
1222	let slice: const i8 = slice as const i8;
1223	let r: i64x4 = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1224	transmute(src:r)
1225	}
1226
1227	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1228	/// where
1229	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1230	/// that position instead.
1231	///
1232	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi64)
1233	#[inline]
1234	#[target_feature(enable = "avx2")]
1235	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
1236	#[rustc_legacy_const_generics(`4`)]
1237	#[stable(feature = "simd_x86", since = "1.27.0")]
1238	pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1239	src: __m256i,
1240	slice: *const i64,
1241	offsets: __m128i,
1242	mask: __m256i,
1243	) -> __m256i {
1244	static_assert_imm8_scale!(SCALE);
1245	let src: i64x4 = src.as_i64x4();
1246	let mask: i64x4 = mask.as_i64x4();
1247	let offsets: i32x4 = offsets.as_i32x4();
1248	let slice: const i8 = slice as const i8;
1249	let r: i64x4 = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1250	transmute(src:r)
1251	}
1252
1253	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1254	/// where
1255	/// `scale` should be 1, 2, 4 or 8.
1256	///
1257	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_pd)
1258	#[inline]
1259	#[target_feature(enable = "avx2")]
1260	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
1261	#[rustc_legacy_const_generics(`2`)]
1262	#[stable(feature = "simd_x86", since = "1.27.0")]
1263	pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1264	static_assert_imm8_scale!(SCALE);
1265	let zero: __m128d = _mm_setzero_pd();
1266	let neg_one: __m128d = _mm_set1_pd(`-1.0`);
1267	let offsets: i32x4 = offsets.as_i32x4();
1268	let slice: const i8 = slice as const i8;
1269	pgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1270	}
1271
1272	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1273	/// where
1274	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1275	/// that position instead.
1276	///
1277	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_pd)
1278	#[inline]
1279	#[target_feature(enable = "avx2")]
1280	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
1281	#[rustc_legacy_const_generics(`4`)]
1282	#[stable(feature = "simd_x86", since = "1.27.0")]
1283	pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1284	src: __m128d,
1285	slice: *const f64,
1286	offsets: __m128i,
1287	mask: __m128d,
1288	) -> __m128d {
1289	static_assert_imm8_scale!(SCALE);
1290	let offsets: i32x4 = offsets.as_i32x4();
1291	let slice: const i8 = slice as const i8;
1292	pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1293	}
1294
1295	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1296	/// where
1297	/// `scale` should be 1, 2, 4 or 8.
1298	///
1299	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_pd)
1300	#[inline]
1301	#[target_feature(enable = "avx2")]
1302	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
1303	#[rustc_legacy_const_generics(`2`)]
1304	#[stable(feature = "simd_x86", since = "1.27.0")]
1305	pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1306	slice: *const f64,
1307	offsets: __m128i,
1308	) -> __m256d {
1309	static_assert_imm8_scale!(SCALE);
1310	let zero: __m256d = _mm256_setzero_pd();
1311	let neg_one: __m256d = _mm256_set1_pd(`-1.0`);
1312	let offsets: i32x4 = offsets.as_i32x4();
1313	let slice: const i8 = slice as const i8;
1314	vpgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1315	}
1316
1317	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1318	/// where
1319	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1320	/// that position instead.
1321	///
1322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_pd)
1323	#[inline]
1324	#[target_feature(enable = "avx2")]
1325	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
1326	#[rustc_legacy_const_generics(`4`)]
1327	#[stable(feature = "simd_x86", since = "1.27.0")]
1328	pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1329	src: __m256d,
1330	slice: *const f64,
1331	offsets: __m128i,
1332	mask: __m256d,
1333	) -> __m256d {
1334	static_assert_imm8_scale!(SCALE);
1335	let offsets: i32x4 = offsets.as_i32x4();
1336	let slice: const i8 = slice as const i8;
1337	vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1338	}
1339
1340	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1341	/// where
1342	/// `scale` should be 1, 2, 4 or 8.
1343	///
1344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi32)
1345	#[inline]
1346	#[target_feature(enable = "avx2")]
1347	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
1348	#[rustc_legacy_const_generics(`2`)]
1349	#[stable(feature = "simd_x86", since = "1.27.0")]
1350	pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1351	slice: *const i32,
1352	offsets: __m128i,
1353	) -> __m128i {
1354	static_assert_imm8_scale!(SCALE);
1355	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1356	let neg_one: i32x4 = _mm_set1_epi64x(`-1`).as_i32x4();
1357	let offsets: i64x2 = offsets.as_i64x2();
1358	let slice: const i8 = slice as const i8;
1359	let r: i32x4 = pgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1360	transmute(src:r)
1361	}
1362
1363	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1364	/// where
1365	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1366	/// that position instead.
1367	///
1368	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi32)
1369	#[inline]
1370	#[target_feature(enable = "avx2")]
1371	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
1372	#[rustc_legacy_const_generics(`4`)]
1373	#[stable(feature = "simd_x86", since = "1.27.0")]
1374	pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1375	src: __m128i,
1376	slice: *const i32,
1377	offsets: __m128i,
1378	mask: __m128i,
1379	) -> __m128i {
1380	static_assert_imm8_scale!(SCALE);
1381	let src: i32x4 = src.as_i32x4();
1382	let mask: i32x4 = mask.as_i32x4();
1383	let offsets: i64x2 = offsets.as_i64x2();
1384	let slice: const i8 = slice as const i8;
1385	let r: i32x4 = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1386	transmute(src:r)
1387	}
1388
1389	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1390	/// where
1391	/// `scale` should be 1, 2, 4 or 8.
1392	///
1393	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi32)
1394	#[inline]
1395	#[target_feature(enable = "avx2")]
1396	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
1397	#[rustc_legacy_const_generics(`2`)]
1398	#[stable(feature = "simd_x86", since = "1.27.0")]
1399	pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1400	slice: *const i32,
1401	offsets: __m256i,
1402	) -> __m128i {
1403	static_assert_imm8_scale!(SCALE);
1404	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1405	let neg_one: i32x4 = _mm_set1_epi64x(`-1`).as_i32x4();
1406	let offsets: i64x4 = offsets.as_i64x4();
1407	let slice: const i8 = slice as const i8;
1408	let r: i32x4 = vpgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1409	transmute(src:r)
1410	}
1411
1412	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1413	/// where
1414	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1415	/// that position instead.
1416	///
1417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi32)
1418	#[inline]
1419	#[target_feature(enable = "avx2")]
1420	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
1421	#[rustc_legacy_const_generics(`4`)]
1422	#[stable(feature = "simd_x86", since = "1.27.0")]
1423	pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1424	src: __m128i,
1425	slice: *const i32,
1426	offsets: __m256i,
1427	mask: __m128i,
1428	) -> __m128i {
1429	static_assert_imm8_scale!(SCALE);
1430	let src: i32x4 = src.as_i32x4();
1431	let mask: i32x4 = mask.as_i32x4();
1432	let offsets: i64x4 = offsets.as_i64x4();
1433	let slice: const i8 = slice as const i8;
1434	let r: i32x4 = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1435	transmute(src:r)
1436	}
1437
1438	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1439	/// where
1440	/// `scale` should be 1, 2, 4 or 8.
1441	///
1442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_ps)
1443	#[inline]
1444	#[target_feature(enable = "avx2")]
1445	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
1446	#[rustc_legacy_const_generics(`2`)]
1447	#[stable(feature = "simd_x86", since = "1.27.0")]
1448	pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1449	static_assert_imm8_scale!(SCALE);
1450	let zero: __m128 = _mm_setzero_ps();
1451	let neg_one: __m128 = _mm_set1_ps(`-1.0`);
1452	let offsets: i64x2 = offsets.as_i64x2();
1453	let slice: const i8 = slice as const i8;
1454	pgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1455	}
1456
1457	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1458	/// where
1459	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1460	/// that position instead.
1461	///
1462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_ps)
1463	#[inline]
1464	#[target_feature(enable = "avx2")]
1465	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
1466	#[rustc_legacy_const_generics(`4`)]
1467	#[stable(feature = "simd_x86", since = "1.27.0")]
1468	pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1469	src: __m128,
1470	slice: *const f32,
1471	offsets: __m128i,
1472	mask: __m128,
1473	) -> __m128 {
1474	static_assert_imm8_scale!(SCALE);
1475	let offsets: i64x2 = offsets.as_i64x2();
1476	let slice: const i8 = slice as const i8;
1477	pgatherqps(src, slice, offsets, mask, SCALE as i8)
1478	}
1479
1480	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1481	/// where
1482	/// `scale` should be 1, 2, 4 or 8.
1483	///
1484	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_ps)
1485	#[inline]
1486	#[target_feature(enable = "avx2")]
1487	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
1488	#[rustc_legacy_const_generics(`2`)]
1489	#[stable(feature = "simd_x86", since = "1.27.0")]
1490	pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1491	static_assert_imm8_scale!(SCALE);
1492	let zero: __m128 = _mm_setzero_ps();
1493	let neg_one: __m128 = _mm_set1_ps(`-1.0`);
1494	let offsets: i64x4 = offsets.as_i64x4();
1495	let slice: const i8 = slice as const i8;
1496	vpgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1497	}
1498
1499	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1500	/// where
1501	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1502	/// that position instead.
1503	///
1504	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_ps)
1505	#[inline]
1506	#[target_feature(enable = "avx2")]
1507	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
1508	#[rustc_legacy_const_generics(`4`)]
1509	#[stable(feature = "simd_x86", since = "1.27.0")]
1510	pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1511	src: __m128,
1512	slice: *const f32,
1513	offsets: __m256i,
1514	mask: __m128,
1515	) -> __m128 {
1516	static_assert_imm8_scale!(SCALE);
1517	let offsets: i64x4 = offsets.as_i64x4();
1518	let slice: const i8 = slice as const i8;
1519	vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1520	}
1521
1522	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1523	/// where
1524	/// `scale` should be 1, 2, 4 or 8.
1525	///
1526	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi64)
1527	#[inline]
1528	#[target_feature(enable = "avx2")]
1529	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
1530	#[rustc_legacy_const_generics(`2`)]
1531	#[stable(feature = "simd_x86", since = "1.27.0")]
1532	pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1533	slice: *const i64,
1534	offsets: __m128i,
1535	) -> __m128i {
1536	static_assert_imm8_scale!(SCALE);
1537	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1538	let neg_one: i64x2 = _mm_set1_epi64x(`-1`).as_i64x2();
1539	let slice: const i8 = slice as const i8;
1540	let offsets: i64x2 = offsets.as_i64x2();
1541	let r: i64x2 = pgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1542	transmute(src:r)
1543	}
1544
1545	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1546	/// where
1547	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1548	/// that position instead.
1549	///
1550	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi64)
1551	#[inline]
1552	#[target_feature(enable = "avx2")]
1553	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
1554	#[rustc_legacy_const_generics(`4`)]
1555	#[stable(feature = "simd_x86", since = "1.27.0")]
1556	pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1557	src: __m128i,
1558	slice: *const i64,
1559	offsets: __m128i,
1560	mask: __m128i,
1561	) -> __m128i {
1562	static_assert_imm8_scale!(SCALE);
1563	let src: i64x2 = src.as_i64x2();
1564	let mask: i64x2 = mask.as_i64x2();
1565	let offsets: i64x2 = offsets.as_i64x2();
1566	let slice: const i8 = slice as const i8;
1567	let r: i64x2 = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1568	transmute(src:r)
1569	}
1570
1571	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1572	/// where
1573	/// `scale` should be 1, 2, 4 or 8.
1574	///
1575	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi64)
1576	#[inline]
1577	#[target_feature(enable = "avx2")]
1578	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
1579	#[rustc_legacy_const_generics(`2`)]
1580	#[stable(feature = "simd_x86", since = "1.27.0")]
1581	pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1582	slice: *const i64,
1583	offsets: __m256i,
1584	) -> __m256i {
1585	static_assert_imm8_scale!(SCALE);
1586	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1587	let neg_one: i64x4 = _mm256_set1_epi64x(`-1`).as_i64x4();
1588	let slice: const i8 = slice as const i8;
1589	let offsets: i64x4 = offsets.as_i64x4();
1590	let r: i64x4 = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1591	transmute(src:r)
1592	}
1593
1594	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1595	/// where
1596	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1597	/// that position instead.
1598	///
1599	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi64)
1600	#[inline]
1601	#[target_feature(enable = "avx2")]
1602	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
1603	#[rustc_legacy_const_generics(`4`)]
1604	#[stable(feature = "simd_x86", since = "1.27.0")]
1605	pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1606	src: __m256i,
1607	slice: *const i64,
1608	offsets: __m256i,
1609	mask: __m256i,
1610	) -> __m256i {
1611	static_assert_imm8_scale!(SCALE);
1612	let src: i64x4 = src.as_i64x4();
1613	let mask: i64x4 = mask.as_i64x4();
1614	let offsets: i64x4 = offsets.as_i64x4();
1615	let slice: const i8 = slice as const i8;
1616	let r: i64x4 = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1617	transmute(src:r)
1618	}
1619
1620	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1621	/// where
1622	/// `scale` should be 1, 2, 4 or 8.
1623	///
1624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd)
1625	#[inline]
1626	#[target_feature(enable = "avx2")]
1627	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
1628	#[rustc_legacy_const_generics(`2`)]
1629	#[stable(feature = "simd_x86", since = "1.27.0")]
1630	pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1631	static_assert_imm8_scale!(SCALE);
1632	let zero: __m128d = _mm_setzero_pd();
1633	let neg_one: __m128d = _mm_set1_pd(`-1.0`);
1634	let slice: const i8 = slice as const i8;
1635	let offsets: i64x2 = offsets.as_i64x2();
1636	pgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1637	}
1638
1639	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1640	/// where
1641	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1642	/// that position instead.
1643	///
1644	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd)
1645	#[inline]
1646	#[target_feature(enable = "avx2")]
1647	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
1648	#[rustc_legacy_const_generics(`4`)]
1649	#[stable(feature = "simd_x86", since = "1.27.0")]
1650	pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1651	src: __m128d,
1652	slice: *const f64,
1653	offsets: __m128i,
1654	mask: __m128d,
1655	) -> __m128d {
1656	static_assert_imm8_scale!(SCALE);
1657	let slice: const i8 = slice as const i8;
1658	let offsets: i64x2 = offsets.as_i64x2();
1659	pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1660	}
1661
1662	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1663	/// where
1664	/// `scale` should be 1, 2, 4 or 8.
1665	///
1666	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd)
1667	#[inline]
1668	#[target_feature(enable = "avx2")]
1669	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
1670	#[rustc_legacy_const_generics(`2`)]
1671	#[stable(feature = "simd_x86", since = "1.27.0")]
1672	pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1673	slice: *const f64,
1674	offsets: __m256i,
1675	) -> __m256d {
1676	static_assert_imm8_scale!(SCALE);
1677	let zero: __m256d = _mm256_setzero_pd();
1678	let neg_one: __m256d = _mm256_set1_pd(`-1.0`);
1679	let slice: const i8 = slice as const i8;
1680	let offsets: i64x4 = offsets.as_i64x4();
1681	vpgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1682	}
1683
1684	/// Returns values from `slice` at offsets determined by `offsets scale`,*
1685	/// where
1686	/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1687	/// that position instead.
1688	///
1689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd)
1690	#[inline]
1691	#[target_feature(enable = "avx2")]
1692	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
1693	#[rustc_legacy_const_generics(`4`)]
1694	#[stable(feature = "simd_x86", since = "1.27.0")]
1695	pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1696	src: __m256d,
1697	slice: *const f64,
1698	offsets: __m256i,
1699	mask: __m256d,
1700	) -> __m256d {
1701	static_assert_imm8_scale!(SCALE);
1702	let slice: const i8 = slice as const i8;
1703	let offsets: i64x4 = offsets.as_i64x4();
1704	vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1705	}
1706
1707	/// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
1708	/// location specified by `IMM1`.
1709	///
1710	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti128_si256)
1711	#[inline]
1712	#[target_feature(enable = "avx2")]
1713	#[cfg_attr(
1714	all(test, not(target_os = "windows")),
1715	assert_instr(vinsertf128, IMM1 = `1`)
1716	)]
1717	#[rustc_legacy_const_generics(`2`)]
1718	#[stable(feature = "simd_x86", since = "1.27.0")]
1719	pub unsafe fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1720	static_assert_uimm_bits!(IMM1, `1`);
1721	let a: i64x4 = a.as_i64x4();
1722	let b: i64x4 = _mm256_castsi128_si256(b).as_i64x4();
1723	let dst: i64x4 = simd_shuffle!(a, b, [[`4`, `5`, `2`, `3`], [`0`, `1`, `4`, `5`]][IMM1 as usize]);
1724	transmute(src:dst)
1725	}
1726
1727	/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
1728	/// intermediate signed 32-bit integers. Horizontally add adjacent pairs
1729	/// of intermediate 32-bit integers.
1730	///
1731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd_epi16)
1732	#[inline]
1733	#[target_feature(enable = "avx2")]
1734	#[cfg_attr(test, assert_instr(vpmaddwd))]
1735	#[stable(feature = "simd_x86", since = "1.27.0")]
1736	pub unsafe fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1737	transmute(src:pmaddwd(a:a.as_i16x16(), b:b.as_i16x16()))
1738	}
1739
1740	/// Vertically multiplies each unsigned 8-bit integer from `a` with the
1741	/// corresponding signed 8-bit integer from `b`, producing intermediate
1742	/// signed 16-bit integers. Horizontally add adjacent pairs of intermediate
1743	/// signed 16-bit integers
1744	///
1745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16)
1746	#[inline]
1747	#[target_feature(enable = "avx2")]
1748	#[cfg_attr(test, assert_instr(vpmaddubsw))]
1749	#[stable(feature = "simd_x86", since = "1.27.0")]
1750	pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1751	transmute(src:pmaddubsw(a:a.as_u8x32(), b:b.as_u8x32()))
1752	}
1753
1754	/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1755	/// (elements are zeroed out when the highest bit is not set in the
1756	/// corresponding element).
1757	///
1758	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi32)
1759	#[inline]
1760	#[target_feature(enable = "avx2")]
1761	#[cfg_attr(test, assert_instr(vpmaskmovd))]
1762	#[stable(feature = "simd_x86", since = "1.27.0")]
1763	pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1764	transmute(src:maskloadd(mem_addr as *const i8, mask:mask.as_i32x4()))
1765	}
1766
1767	/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1768	/// (elements are zeroed out when the highest bit is not set in the
1769	/// corresponding element).
1770	///
1771	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi32)
1772	#[inline]
1773	#[target_feature(enable = "avx2")]
1774	#[cfg_attr(test, assert_instr(vpmaskmovd))]
1775	#[stable(feature = "simd_x86", since = "1.27.0")]
1776	pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1777	transmute(src:maskloadd256(mem_addr as *const i8, mask:mask.as_i32x8()))
1778	}
1779
1780	/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
1781	/// (elements are zeroed out when the highest bit is not set in the
1782	/// corresponding element).
1783	///
1784	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi64)
1785	#[inline]
1786	#[target_feature(enable = "avx2")]
1787	#[cfg_attr(test, assert_instr(vpmaskmovq))]
1788	#[stable(feature = "simd_x86", since = "1.27.0")]
1789	pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1790	transmute(src:maskloadq(mem_addr as *const i8, mask:mask.as_i64x2()))
1791	}
1792
1793	/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
1794	/// (elements are zeroed out when the highest bit is not set in the
1795	/// corresponding element).
1796	///
1797	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi64)
1798	#[inline]
1799	#[target_feature(enable = "avx2")]
1800	#[cfg_attr(test, assert_instr(vpmaskmovq))]
1801	#[stable(feature = "simd_x86", since = "1.27.0")]
1802	pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1803	transmute(src:maskloadq256(mem_addr as *const i8, mask:mask.as_i64x4()))
1804	}
1805
1806	/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
1807	/// using `mask` (elements are not stored when the highest bit is not set
1808	/// in the corresponding element).
1809	///
1810	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi32)
1811	#[inline]
1812	#[target_feature(enable = "avx2")]
1813	#[cfg_attr(test, assert_instr(vpmaskmovd))]
1814	#[stable(feature = "simd_x86", since = "1.27.0")]
1815	pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1816	maskstored(mem_addr as *mut i8, mask:mask.as_i32x4(), a:a.as_i32x4())
1817	}
1818
1819	/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
1820	/// using `mask` (elements are not stored when the highest bit is not set
1821	/// in the corresponding element).
1822	///
1823	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi32)
1824	#[inline]
1825	#[target_feature(enable = "avx2")]
1826	#[cfg_attr(test, assert_instr(vpmaskmovd))]
1827	#[stable(feature = "simd_x86", since = "1.27.0")]
1828	pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1829	maskstored256(mem_addr as *mut i8, mask:mask.as_i32x8(), a:a.as_i32x8())
1830	}
1831
1832	/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
1833	/// using `mask` (elements are not stored when the highest bit is not set
1834	/// in the corresponding element).
1835	///
1836	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi64)
1837	#[inline]
1838	#[target_feature(enable = "avx2")]
1839	#[cfg_attr(test, assert_instr(vpmaskmovq))]
1840	#[stable(feature = "simd_x86", since = "1.27.0")]
1841	pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1842	maskstoreq(mem_addr as *mut i8, mask:mask.as_i64x2(), a:a.as_i64x2())
1843	}
1844
1845	/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
1846	/// using `mask` (elements are not stored when the highest bit is not set
1847	/// in the corresponding element).
1848	///
1849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi64)
1850	#[inline]
1851	#[target_feature(enable = "avx2")]
1852	#[cfg_attr(test, assert_instr(vpmaskmovq))]
1853	#[stable(feature = "simd_x86", since = "1.27.0")]
1854	pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1855	maskstoreq256(mem_addr as *mut i8, mask:mask.as_i64x4(), a:a.as_i64x4())
1856	}
1857
1858	/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
1859	/// maximum values.
1860	///
1861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi16)
1862	#[inline]
1863	#[target_feature(enable = "avx2")]
1864	#[cfg_attr(test, assert_instr(vpmaxsw))]
1865	#[stable(feature = "simd_x86", since = "1.27.0")]
1866	pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1867	let a: i16x16 = a.as_i16x16();
1868	let b: i16x16 = b.as_i16x16();
1869	transmute(src:simd_select::<i16x16, _>(m:simd_gt(x:a, y:b), a, b))
1870	}
1871
1872	/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
1873	/// maximum values.
1874	///
1875	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi32)
1876	#[inline]
1877	#[target_feature(enable = "avx2")]
1878	#[cfg_attr(test, assert_instr(vpmaxsd))]
1879	#[stable(feature = "simd_x86", since = "1.27.0")]
1880	pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1881	let a: i32x8 = a.as_i32x8();
1882	let b: i32x8 = b.as_i32x8();
1883	transmute(src:simd_select::<i32x8, _>(m:simd_gt(x:a, y:b), a, b))
1884	}
1885
1886	/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
1887	/// maximum values.
1888	///
1889	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi8)
1890	#[inline]
1891	#[target_feature(enable = "avx2")]
1892	#[cfg_attr(test, assert_instr(vpmaxsb))]
1893	#[stable(feature = "simd_x86", since = "1.27.0")]
1894	pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1895	let a: i8x32 = a.as_i8x32();
1896	let b: i8x32 = b.as_i8x32();
1897	transmute(src:simd_select::<i8x32, _>(m:simd_gt(x:a, y:b), a, b))
1898	}
1899
1900	/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
1901	/// the packed maximum values.
1902	///
1903	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu16)
1904	#[inline]
1905	#[target_feature(enable = "avx2")]
1906	#[cfg_attr(test, assert_instr(vpmaxuw))]
1907	#[stable(feature = "simd_x86", since = "1.27.0")]
1908	pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1909	let a: u16x16 = a.as_u16x16();
1910	let b: u16x16 = b.as_u16x16();
1911	transmute(src:simd_select::<i16x16, _>(m:simd_gt(x:a, y:b), a, b))
1912	}
1913
1914	/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
1915	/// the packed maximum values.
1916	///
1917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu32)
1918	#[inline]
1919	#[target_feature(enable = "avx2")]
1920	#[cfg_attr(test, assert_instr(vpmaxud))]
1921	#[stable(feature = "simd_x86", since = "1.27.0")]
1922	pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1923	let a: u32x8 = a.as_u32x8();
1924	let b: u32x8 = b.as_u32x8();
1925	transmute(src:simd_select::<i32x8, _>(m:simd_gt(x:a, y:b), a, b))
1926	}
1927
1928	/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
1929	/// the packed maximum values.
1930	///
1931	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu8)
1932	#[inline]
1933	#[target_feature(enable = "avx2")]
1934	#[cfg_attr(test, assert_instr(vpmaxub))]
1935	#[stable(feature = "simd_x86", since = "1.27.0")]
1936	pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
1937	let a: u8x32 = a.as_u8x32();
1938	let b: u8x32 = b.as_u8x32();
1939	transmute(src:simd_select::<i8x32, _>(m:simd_gt(x:a, y:b), a, b))
1940	}
1941
1942	/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
1943	/// minimum values.
1944	///
1945	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi16)
1946	#[inline]
1947	#[target_feature(enable = "avx2")]
1948	#[cfg_attr(test, assert_instr(vpminsw))]
1949	#[stable(feature = "simd_x86", since = "1.27.0")]
1950	pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
1951	let a: i16x16 = a.as_i16x16();
1952	let b: i16x16 = b.as_i16x16();
1953	transmute(src:simd_select::<i16x16, _>(m:simd_lt(x:a, y:b), a, b))
1954	}
1955
1956	/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
1957	/// minimum values.
1958	///
1959	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi32)
1960	#[inline]
1961	#[target_feature(enable = "avx2")]
1962	#[cfg_attr(test, assert_instr(vpminsd))]
1963	#[stable(feature = "simd_x86", since = "1.27.0")]
1964	pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
1965	let a: i32x8 = a.as_i32x8();
1966	let b: i32x8 = b.as_i32x8();
1967	transmute(src:simd_select::<i32x8, _>(m:simd_lt(x:a, y:b), a, b))
1968	}
1969
1970	/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
1971	/// minimum values.
1972	///
1973	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi8)
1974	#[inline]
1975	#[target_feature(enable = "avx2")]
1976	#[cfg_attr(test, assert_instr(vpminsb))]
1977	#[stable(feature = "simd_x86", since = "1.27.0")]
1978	pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
1979	let a: i8x32 = a.as_i8x32();
1980	let b: i8x32 = b.as_i8x32();
1981	transmute(src:simd_select::<i8x32, _>(m:simd_lt(x:a, y:b), a, b))
1982	}
1983
1984	/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
1985	/// the packed minimum values.
1986	///
1987	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu16)
1988	#[inline]
1989	#[target_feature(enable = "avx2")]
1990	#[cfg_attr(test, assert_instr(vpminuw))]
1991	#[stable(feature = "simd_x86", since = "1.27.0")]
1992	pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
1993	let a: u16x16 = a.as_u16x16();
1994	let b: u16x16 = b.as_u16x16();
1995	transmute(src:simd_select::<i16x16, _>(m:simd_lt(x:a, y:b), a, b))
1996	}
1997
1998	/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
1999	/// the packed minimum values.
2000	///
2001	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu32)
2002	#[inline]
2003	#[target_feature(enable = "avx2")]
2004	#[cfg_attr(test, assert_instr(vpminud))]
2005	#[stable(feature = "simd_x86", since = "1.27.0")]
2006	pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2007	let a: u32x8 = a.as_u32x8();
2008	let b: u32x8 = b.as_u32x8();
2009	transmute(src:simd_select::<i32x8, _>(m:simd_lt(x:a, y:b), a, b))
2010	}
2011
2012	/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2013	/// the packed minimum values.
2014	///
2015	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu8)
2016	#[inline]
2017	#[target_feature(enable = "avx2")]
2018	#[cfg_attr(test, assert_instr(vpminub))]
2019	#[stable(feature = "simd_x86", since = "1.27.0")]
2020	pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2021	let a: u8x32 = a.as_u8x32();
2022	let b: u8x32 = b.as_u8x32();
2023	transmute(src:simd_select::<i8x32, _>(m:simd_lt(x:a, y:b), a, b))
2024	}
2025
2026	/// Creates mask from the most significant bit of each 8-bit element in `a`,
2027	/// return the result.
2028	///
2029	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_epi8)
2030	#[inline]
2031	#[target_feature(enable = "avx2")]
2032	#[cfg_attr(test, assert_instr(vpmovmskb))]
2033	#[stable(feature = "simd_x86", since = "1.27.0")]
2034	pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2035	let z: i8x32 = i8x32::splat(`0`);
2036	let m: i8x32 = simd_lt(x:a.as_i8x32(), y:z);
2037	simd_bitmask::<_, u32>(m) as i32
2038	}
2039
2040	/// Computes the sum of absolute differences (SADs) of quadruplets of unsigned
2041	/// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit
2042	/// results in dst. Eight SADs are performed for each 128-bit lane using one
2043	/// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is
2044	/// selected from `b` starting at on the offset specified in `imm8`. Eight
2045	/// quadruplets are formed from sequential 8-bit integers selected from `a`
2046	/// starting at the offset specified in `imm8`.
2047	///
2048	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mpsadbw_epu8)
2049	#[inline]
2050	#[target_feature(enable = "avx2")]
2051	#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = `0`))]
2052	#[rustc_legacy_const_generics(`2`)]
2053	#[stable(feature = "simd_x86", since = "1.27.0")]
2054	pub unsafe fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2055	static_assert_uimm_bits!(IMM8, `8`);
2056	transmute(src:mpsadbw(a:a.as_u8x32(), b:b.as_u8x32(), IMM8))
2057	}
2058
2059	/// Multiplies the low 32-bit integers from each packed 64-bit element in
2060	/// `a` and `b`
2061	///
2062	/// Returns the 64-bit results.
2063	///
2064	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epi32)
2065	#[inline]
2066	#[target_feature(enable = "avx2")]
2067	#[cfg_attr(test, assert_instr(vpmuldq))]
2068	#[stable(feature = "simd_x86", since = "1.27.0")]
2069	pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2070	let a: i64x4 = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2071	let b: i64x4 = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2072	transmute(src:simd_mul(x:a, y:b))
2073	}
2074
2075	/// Multiplies the low unsigned 32-bit integers from each packed 64-bit
2076	/// element in `a` and `b`
2077	///
2078	/// Returns the unsigned 64-bit results.
2079	///
2080	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epu32)
2081	#[inline]
2082	#[target_feature(enable = "avx2")]
2083	#[cfg_attr(test, assert_instr(vpmuludq))]
2084	#[stable(feature = "simd_x86", since = "1.27.0")]
2085	pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2086	let a: u64x4 = a.as_u64x4();
2087	let b: u64x4 = b.as_u64x4();
2088	let mask: u64x4 = u64x4::splat(u32::MAX.into());
2089	transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
2090	}
2091
2092	/// Multiplies the packed 16-bit integers in `a` and `b`, producing
2093	/// intermediate 32-bit integers and returning the high 16 bits of the
2094	/// intermediate integers.
2095	///
2096	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epi16)
2097	#[inline]
2098	#[target_feature(enable = "avx2")]
2099	#[cfg_attr(test, assert_instr(vpmulhw))]
2100	#[stable(feature = "simd_x86", since = "1.27.0")]
2101	pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2102	let a: i32x16 = simd_cast::<_, i32x16>(a.as_i16x16());
2103	let b: i32x16 = simd_cast::<_, i32x16>(b.as_i16x16());
2104	let r: i32x16 = simd_shr(x:simd_mul(a, b), y:i32x16::splat(`16`));
2105	transmute(src:simd_cast::<i32x16, i16x16>(r))
2106	}
2107
2108	/// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing
2109	/// intermediate 32-bit integers and returning the high 16 bits of the
2110	/// intermediate integers.
2111	///
2112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epu16)
2113	#[inline]
2114	#[target_feature(enable = "avx2")]
2115	#[cfg_attr(test, assert_instr(vpmulhuw))]
2116	#[stable(feature = "simd_x86", since = "1.27.0")]
2117	pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2118	let a: u32x16 = simd_cast::<_, u32x16>(a.as_u16x16());
2119	let b: u32x16 = simd_cast::<_, u32x16>(b.as_u16x16());
2120	let r: u32x16 = simd_shr(x:simd_mul(a, b), y:u32x16::splat(`16`));
2121	transmute(src:simd_cast::<u32x16, u16x16>(r))
2122	}
2123
2124	/// Multiplies the packed 16-bit integers in `a` and `b`, producing
2125	/// intermediate 32-bit integers, and returns the low 16 bits of the
2126	/// intermediate integers
2127	///
2128	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi16)
2129	#[inline]
2130	#[target_feature(enable = "avx2")]
2131	#[cfg_attr(test, assert_instr(vpmullw))]
2132	#[stable(feature = "simd_x86", since = "1.27.0")]
2133	pub unsafe fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2134	transmute(src:simd_mul(x:a.as_i16x16(), y:b.as_i16x16()))
2135	}
2136
2137	/// Multiplies the packed 32-bit integers in `a` and `b`, producing
2138	/// intermediate 64-bit integers, and returns the low 32 bits of the
2139	/// intermediate integers
2140	///
2141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi32)
2142	#[inline]
2143	#[target_feature(enable = "avx2")]
2144	#[cfg_attr(test, assert_instr(vpmulld))]
2145	#[stable(feature = "simd_x86", since = "1.27.0")]
2146	pub unsafe fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2147	transmute(src:simd_mul(x:a.as_i32x8(), y:b.as_i32x8()))
2148	}
2149
2150	/// Multiplies packed 16-bit integers in `a` and `b`, producing
2151	/// intermediate signed 32-bit integers. Truncate each intermediate
2152	/// integer to the 18 most significant bits, round by adding 1, and
2153	/// return bits `[16:1]`.
2154	///
2155	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhrs_epi16)
2156	#[inline]
2157	#[target_feature(enable = "avx2")]
2158	#[cfg_attr(test, assert_instr(vpmulhrsw))]
2159	#[stable(feature = "simd_x86", since = "1.27.0")]
2160	pub unsafe fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2161	transmute(src:pmulhrsw(a:a.as_i16x16(), b:b.as_i16x16()))
2162	}
2163
2164	/// Computes the bitwise OR of 256 bits (representing integer data) in `a`
2165	/// and `b`
2166	///
2167	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_si256)
2168	#[inline]
2169	#[target_feature(enable = "avx2")]
2170	#[cfg_attr(test, assert_instr(vorps))]
2171	#[stable(feature = "simd_x86", since = "1.27.0")]
2172	pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2173	transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8()))
2174	}
2175
2176	/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2177	/// using signed saturation
2178	///
2179	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16)
2180	#[inline]
2181	#[target_feature(enable = "avx2")]
2182	#[cfg_attr(test, assert_instr(vpacksswb))]
2183	#[stable(feature = "simd_x86", since = "1.27.0")]
2184	pub unsafe fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2185	transmute(src:packsswb(a:a.as_i16x16(), b:b.as_i16x16()))
2186	}
2187
2188	/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2189	/// using signed saturation
2190	///
2191	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32)
2192	#[inline]
2193	#[target_feature(enable = "avx2")]
2194	#[cfg_attr(test, assert_instr(vpackssdw))]
2195	#[stable(feature = "simd_x86", since = "1.27.0")]
2196	pub unsafe fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2197	transmute(src:packssdw(a:a.as_i32x8(), b:b.as_i32x8()))
2198	}
2199
2200	/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2201	/// using unsigned saturation
2202	///
2203	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16)
2204	#[inline]
2205	#[target_feature(enable = "avx2")]
2206	#[cfg_attr(test, assert_instr(vpackuswb))]
2207	#[stable(feature = "simd_x86", since = "1.27.0")]
2208	pub unsafe fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2209	transmute(src:packuswb(a:a.as_i16x16(), b:b.as_i16x16()))
2210	}
2211
2212	/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2213	/// using unsigned saturation
2214	///
2215	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32)
2216	#[inline]
2217	#[target_feature(enable = "avx2")]
2218	#[cfg_attr(test, assert_instr(vpackusdw))]
2219	#[stable(feature = "simd_x86", since = "1.27.0")]
2220	pub unsafe fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2221	transmute(src:packusdw(a:a.as_i32x8(), b:b.as_i32x8()))
2222	}
2223
2224	/// Permutes packed 32-bit integers from `a` according to the content of `b`.
2225	///
2226	/// The last 3 bits of each integer of `b` are used as addresses into the 8
2227	/// integers of `a`.
2228	///
2229	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32)
2230	#[inline]
2231	#[target_feature(enable = "avx2")]
2232	#[cfg_attr(test, assert_instr(vpermps))]
2233	#[stable(feature = "simd_x86", since = "1.27.0")]
2234	pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2235	transmute(src:permd(a:a.as_u32x8(), b:b.as_u32x8()))
2236	}
2237
2238	/// Permutes 64-bit integers from `a` using control mask `imm8`.
2239	///
2240	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_epi64)
2241	#[inline]
2242	#[target_feature(enable = "avx2")]
2243	#[cfg_attr(test, assert_instr(vpermpd, IMM8 = `9`))]
2244	#[rustc_legacy_const_generics(`1`)]
2245	#[stable(feature = "simd_x86", since = "1.27.0")]
2246	pub unsafe fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2247	static_assert_uimm_bits!(IMM8, `8`);
2248	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
2249	let r: i64x4 = simd_shuffle!(
2250	a.as_i64x4(),
2251	zero,
2252	[
2253	IMM8 as u32 & `0b11`,
2254	(IMM8 as u32 >> `2`) & `0b11`,
2255	(IMM8 as u32 >> `4`) & `0b11`,
2256	(IMM8 as u32 >> `6`) & `0b11`,
2257	],
2258	);
2259	transmute(src:r)
2260	}
2261
2262	/// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`.
2263	///
2264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256)
2265	#[inline]
2266	#[target_feature(enable = "avx2")]
2267	#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = `9`))]
2268	#[rustc_legacy_const_generics(`2`)]
2269	#[stable(feature = "simd_x86", since = "1.27.0")]
2270	pub unsafe fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2271	static_assert_uimm_bits!(IMM8, `8`);
2272	transmute(src:vperm2i128(a:a.as_i64x4(), b:b.as_i64x4(), IMM8 as i8))
2273	}
2274
2275	/// Shuffles 64-bit floating-point elements in `a` across lanes using the
2276	/// control in `imm8`.
2277	///
2278	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_pd)
2279	#[inline]
2280	#[target_feature(enable = "avx2")]
2281	#[cfg_attr(test, assert_instr(vpermpd, IMM8 = `1`))]
2282	#[rustc_legacy_const_generics(`1`)]
2283	#[stable(feature = "simd_x86", since = "1.27.0")]
2284	pub unsafe fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2285	static_assert_uimm_bits!(IMM8, `8`);
2286	simd_shuffle!(
2287	a,
2288	_mm256_undefined_pd(),
2289	[
2290	IMM8 as u32 & `0b11`,
2291	(IMM8 as u32 >> `2`) & `0b11`,
2292	(IMM8 as u32 >> `4`) & `0b11`,
2293	(IMM8 as u32 >> `6`) & `0b11`,
2294	],
2295	)
2296	}
2297
2298	/// Shuffles eight 32-bit floating-point elements in `a` across lanes using
2299	/// the corresponding 32-bit integer index in `idx`.
2300	///
2301	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_ps)
2302	#[inline]
2303	#[target_feature(enable = "avx2")]
2304	#[cfg_attr(test, assert_instr(vpermps))]
2305	#[stable(feature = "simd_x86", since = "1.27.0")]
2306	pub unsafe fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2307	permps(a, b:idx.as_i32x8())
2308	}
2309
2310	/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
2311	/// and `b`, then horizontally sum each consecutive 8 differences to
2312	/// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
2313	/// integers in the low 16 bits of the 64-bit return value
2314	///
2315	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sad_epu8)
2316	#[inline]
2317	#[target_feature(enable = "avx2")]
2318	#[cfg_attr(test, assert_instr(vpsadbw))]
2319	#[stable(feature = "simd_x86", since = "1.27.0")]
2320	pub unsafe fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2321	transmute(src:psadbw(a:a.as_u8x32(), b:b.as_u8x32()))
2322	}
2323
2324	/// Shuffles bytes from `a` according to the content of `b`.
2325	///
2326	/// For each of the 128-bit low and high halves of the vectors, the last
2327	/// 4 bits of each byte of `b` are used as addresses into the respective
2328	/// low or high 16 bytes of `a`. That is, the halves are shuffled separately.
2329	///
2330	/// In addition, if the highest significant bit of a byte of `b` is set, the
2331	/// respective destination byte is set to 0.
2332	///
2333	/// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically
2334	/// equivalent to:
2335	///
2336	/// ```
2337	/// fn mm256_shuffle_epi8(a: [u8; `32`], b: [u8; `32`]) -> [u8; `32`] {
2338	/// let mut r = [`0`; `32`];
2339	/// for i in `0`..`16` {
2340	/// // if the most significant bit of b is set,
2341	/// // then the destination byte is set to 0.
2342	/// if b[i] & `0x80` == `0u8` {
2343	/// r[i] = a[(b[i] % `16`) as usize];
2344	/// }
2345	/// if b[i + `16`] & `0x80` == `0u8` {
2346	/// r[i + `16`] = a[(b[i + `16`] % `16` + `16`) as usize];
2347	/// }
2348	/// }
2349	/// r
2350	/// }
2351	/// ```
2352	///
2353	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi8)
2354	#[inline]
2355	#[target_feature(enable = "avx2")]
2356	#[cfg_attr(test, assert_instr(vpshufb))]
2357	#[stable(feature = "simd_x86", since = "1.27.0")]
2358	pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2359	transmute(src:pshufb(a:a.as_u8x32(), b:b.as_u8x32()))
2360	}
2361
2362	/// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in
2363	/// `imm8`.
2364	///
2365	/// ```rust
2366	/// #[cfg(target_arch = "x86")]
2367	/// use std::arch::x86::*;
2368	/// #[cfg(target_arch = "x86_64")]
2369	/// use std::arch::x86_64::*;
2370	///
2371	/// # fn main() {
2372	/// # if is_x86_feature_detected!("avx2") {
2373	/// # #[target_feature(enable = "avx2")]
2374	/// # unsafe fn worker() {
2375	/// let a = _mm256_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
2376	///
2377	/// let c1 = _mm256_shuffle_epi32(a, `0b00_11_10_01`);
2378	/// let c2 = _mm256_shuffle_epi32(a, `0b01_00_10_11`);
2379	///
2380	/// let expected1 = _mm256_setr_epi32(`1`, `2`, `3`, `0`, `5`, `6`, `7`, `4`);
2381	/// let expected2 = _mm256_setr_epi32(`3`, `2`, `0`, `1`, `7`, `6`, `4`, `5`);
2382	///
2383	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c1, expected1)), !`0`);
2384	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c2, expected2)), !`0`);
2385	/// # }
2386	/// # unsafe { worker(); }
2387	/// # }
2388	/// # }
2389	/// ```
2390	///
2391	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi32)
2392	#[inline]
2393	#[target_feature(enable = "avx2")]
2394	#[cfg_attr(test, assert_instr(vshufps, MASK = `9`))]
2395	#[rustc_legacy_const_generics(`1`)]
2396	#[stable(feature = "simd_x86", since = "1.27.0")]
2397	pub unsafe fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2398	static_assert_uimm_bits!(MASK, `8`);
2399	let r: i32x8 = simd_shuffle!(
2400	a.as_i32x8(),
2401	a.as_i32x8(),
2402	[
2403	MASK as u32 & `0b11`,
2404	(MASK as u32 >> `2`) & `0b11`,
2405	(MASK as u32 >> `4`) & `0b11`,
2406	(MASK as u32 >> `6`) & `0b11`,
2407	(MASK as u32 & `0b11`) + `4`,
2408	((MASK as u32 >> `2`) & `0b11`) + `4`,
2409	((MASK as u32 >> `4`) & `0b11`) + `4`,
2410	((MASK as u32 >> `6`) & `0b11`) + `4`,
2411	],
2412	);
2413	transmute(src:r)
2414	}
2415
2416	/// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using
2417	/// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied
2418	/// to the output.
2419	///
2420	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflehi_epi16)
2421	#[inline]
2422	#[target_feature(enable = "avx2")]
2423	#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = `9`))]
2424	#[rustc_legacy_const_generics(`1`)]
2425	#[stable(feature = "simd_x86", since = "1.27.0")]
2426	pub unsafe fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2427	static_assert_uimm_bits!(IMM8, `8`);
2428	let a = a.as_i16x16();
2429	let r: i16x16 = simd_shuffle!(
2430	a,
2431	a,
2432	[
2433	`0`,
2434	`1`,
2435	`2`,
2436	`3`,
2437	`4` + (IMM8 as u32 & `0b11`),
2438	`4` + ((IMM8 as u32 >> `2`) & `0b11`),
2439	`4` + ((IMM8 as u32 >> `4`) & `0b11`),
2440	`4` + ((IMM8 as u32 >> `6`) & `0b11`),
2441	`8`,
2442	`9`,
2443	`10`,
2444	`11`,
2445	`12` + (IMM8 as u32 & `0b11`),
2446	`12` + ((IMM8 as u32 >> `2`) & `0b11`),
2447	`12` + ((IMM8 as u32 >> `4`) & `0b11`),
2448	`12` + ((IMM8 as u32 >> `6`) & `0b11`),
2449	],
2450	);
2451	transmute(r)
2452	}
2453
2454	/// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using
2455	/// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied
2456	/// to the output.
2457	///
2458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflelo_epi16)
2459	#[inline]
2460	#[target_feature(enable = "avx2")]
2461	#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = `9`))]
2462	#[rustc_legacy_const_generics(`1`)]
2463	#[stable(feature = "simd_x86", since = "1.27.0")]
2464	pub unsafe fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2465	static_assert_uimm_bits!(IMM8, `8`);
2466	let a = a.as_i16x16();
2467	let r: i16x16 = simd_shuffle!(
2468	a,
2469	a,
2470	[
2471	`0` + (IMM8 as u32 & `0b11`),
2472	`0` + ((IMM8 as u32 >> `2`) & `0b11`),
2473	`0` + ((IMM8 as u32 >> `4`) & `0b11`),
2474	`0` + ((IMM8 as u32 >> `6`) & `0b11`),
2475	`4`,
2476	`5`,
2477	`6`,
2478	`7`,
2479	`8` + (IMM8 as u32 & `0b11`),
2480	`8` + ((IMM8 as u32 >> `2`) & `0b11`),
2481	`8` + ((IMM8 as u32 >> `4`) & `0b11`),
2482	`8` + ((IMM8 as u32 >> `6`) & `0b11`),
2483	`12`,
2484	`13`,
2485	`14`,
2486	`15`,
2487	],
2488	);
2489	transmute(r)
2490	}
2491
2492	/// Negates packed 16-bit integers in `a` when the corresponding signed
2493	/// 16-bit integer in `b` is negative, and returns the results.
2494	/// Results are zeroed out when the corresponding element in `b` is zero.
2495	///
2496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi16)
2497	#[inline]
2498	#[target_feature(enable = "avx2")]
2499	#[cfg_attr(test, assert_instr(vpsignw))]
2500	#[stable(feature = "simd_x86", since = "1.27.0")]
2501	pub unsafe fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2502	transmute(src:psignw(a:a.as_i16x16(), b:b.as_i16x16()))
2503	}
2504
2505	/// Negates packed 32-bit integers in `a` when the corresponding signed
2506	/// 32-bit integer in `b` is negative, and returns the results.
2507	/// Results are zeroed out when the corresponding element in `b` is zero.
2508	///
2509	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi32)
2510	#[inline]
2511	#[target_feature(enable = "avx2")]
2512	#[cfg_attr(test, assert_instr(vpsignd))]
2513	#[stable(feature = "simd_x86", since = "1.27.0")]
2514	pub unsafe fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2515	transmute(src:psignd(a:a.as_i32x8(), b:b.as_i32x8()))
2516	}
2517
2518	/// Negates packed 8-bit integers in `a` when the corresponding signed
2519	/// 8-bit integer in `b` is negative, and returns the results.
2520	/// Results are zeroed out when the corresponding element in `b` is zero.
2521	///
2522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi8)
2523	#[inline]
2524	#[target_feature(enable = "avx2")]
2525	#[cfg_attr(test, assert_instr(vpsignb))]
2526	#[stable(feature = "simd_x86", since = "1.27.0")]
2527	pub unsafe fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2528	transmute(src:psignb(a:a.as_i8x32(), b:b.as_i8x32()))
2529	}
2530
2531	/// Shifts packed 16-bit integers in `a` left by `count` while
2532	/// shifting in zeros, and returns the result
2533	///
2534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi16)
2535	#[inline]
2536	#[target_feature(enable = "avx2")]
2537	#[cfg_attr(test, assert_instr(vpsllw))]
2538	#[stable(feature = "simd_x86", since = "1.27.0")]
2539	pub unsafe fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2540	transmute(src:psllw(a:a.as_i16x16(), count:count.as_i16x8()))
2541	}
2542
2543	/// Shifts packed 32-bit integers in `a` left by `count` while
2544	/// shifting in zeros, and returns the result
2545	///
2546	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi32)
2547	#[inline]
2548	#[target_feature(enable = "avx2")]
2549	#[cfg_attr(test, assert_instr(vpslld))]
2550	#[stable(feature = "simd_x86", since = "1.27.0")]
2551	pub unsafe fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2552	transmute(src:pslld(a:a.as_i32x8(), count:count.as_i32x4()))
2553	}
2554
2555	/// Shifts packed 64-bit integers in `a` left by `count` while
2556	/// shifting in zeros, and returns the result
2557	///
2558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi64)
2559	#[inline]
2560	#[target_feature(enable = "avx2")]
2561	#[cfg_attr(test, assert_instr(vpsllq))]
2562	#[stable(feature = "simd_x86", since = "1.27.0")]
2563	pub unsafe fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2564	transmute(src:psllq(a:a.as_i64x4(), count:count.as_i64x2()))
2565	}
2566
2567	/// Shifts packed 16-bit integers in `a` left by `IMM8` while
2568	/// shifting in zeros, return the results;
2569	///
2570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi16)
2571	#[inline]
2572	#[target_feature(enable = "avx2")]
2573	#[cfg_attr(test, assert_instr(vpsllw, IMM8 = `7`))]
2574	#[rustc_legacy_const_generics(`1`)]
2575	#[stable(feature = "simd_x86", since = "1.27.0")]
2576	pub unsafe fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2577	static_assert_uimm_bits!(IMM8, `8`);
2578	if IMM8 >= `16` {
2579	_mm256_setzero_si256()
2580	} else {
2581	transmute(src:simd_shl(x:a.as_u16x16(), y:u16x16::splat(IMM8 as u16)))
2582	}
2583	}
2584
2585	/// Shifts packed 32-bit integers in `a` left by `IMM8` while
2586	/// shifting in zeros, return the results;
2587	///
2588	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi32)
2589	#[inline]
2590	#[target_feature(enable = "avx2")]
2591	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `7`))]
2592	#[rustc_legacy_const_generics(`1`)]
2593	#[stable(feature = "simd_x86", since = "1.27.0")]
2594	pub unsafe fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2595	static_assert_uimm_bits!(IMM8, `8`);
2596	if IMM8 >= `32` {
2597	_mm256_setzero_si256()
2598	} else {
2599	transmute(src:simd_shl(x:a.as_u32x8(), y:u32x8::splat(IMM8 as u32)))
2600	}
2601	}
2602
2603	/// Shifts packed 64-bit integers in `a` left by `IMM8` while
2604	/// shifting in zeros, return the results;
2605	///
2606	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi64)
2607	#[inline]
2608	#[target_feature(enable = "avx2")]
2609	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `7`))]
2610	#[rustc_legacy_const_generics(`1`)]
2611	#[stable(feature = "simd_x86", since = "1.27.0")]
2612	pub unsafe fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2613	static_assert_uimm_bits!(IMM8, `8`);
2614	if IMM8 >= `64` {
2615	_mm256_setzero_si256()
2616	} else {
2617	transmute(src:simd_shl(x:a.as_u64x4(), y:u64x4::splat(IMM8 as u64)))
2618	}
2619	}
2620
2621	/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2622	///
2623	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_si256)
2624	#[inline]
2625	#[target_feature(enable = "avx2")]
2626	#[cfg_attr(test, assert_instr(vpslldq, IMM8 = `3`))]
2627	#[rustc_legacy_const_generics(`1`)]
2628	#[stable(feature = "simd_x86", since = "1.27.0")]
2629	pub unsafe fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2630	static_assert_uimm_bits!(IMM8, `8`);
2631	_mm256_bslli_epi128::<IMM8>(a)
2632	}
2633
2634	/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2635	///
2636	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bslli_epi128)
2637	#[inline]
2638	#[target_feature(enable = "avx2")]
2639	#[cfg_attr(test, assert_instr(vpslldq, IMM8 = `3`))]
2640	#[rustc_legacy_const_generics(`1`)]
2641	#[stable(feature = "simd_x86", since = "1.27.0")]
2642	pub unsafe fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2643	static_assert_uimm_bits!(IMM8, `8`);
2644	const fn mask(shift: i32, i: u32) -> u32 {
2645	let shift = shift as u32 & `0xff`;
2646	if shift > `15` \|\| i % `16` < shift {
2647	`0`
2648	} else {
2649	`32` + (i - shift)
2650	}
2651	}
2652	let a = a.as_i8x32();
2653	let zero = _mm256_setzero_si256().as_i8x32();
2654	let r: i8x32 = simd_shuffle!(
2655	zero,
2656	a,
2657	[
2658	mask(IMM8, `0`),
2659	mask(IMM8, `1`),
2660	mask(IMM8, `2`),
2661	mask(IMM8, `3`),
2662	mask(IMM8, `4`),
2663	mask(IMM8, `5`),
2664	mask(IMM8, `6`),
2665	mask(IMM8, `7`),
2666	mask(IMM8, `8`),
2667	mask(IMM8, `9`),
2668	mask(IMM8, `10`),
2669	mask(IMM8, `11`),
2670	mask(IMM8, `12`),
2671	mask(IMM8, `13`),
2672	mask(IMM8, `14`),
2673	mask(IMM8, `15`),
2674	mask(IMM8, `16`),
2675	mask(IMM8, `17`),
2676	mask(IMM8, `18`),
2677	mask(IMM8, `19`),
2678	mask(IMM8, `20`),
2679	mask(IMM8, `21`),
2680	mask(IMM8, `22`),
2681	mask(IMM8, `23`),
2682	mask(IMM8, `24`),
2683	mask(IMM8, `25`),
2684	mask(IMM8, `26`),
2685	mask(IMM8, `27`),
2686	mask(IMM8, `28`),
2687	mask(IMM8, `29`),
2688	mask(IMM8, `30`),
2689	mask(IMM8, `31`),
2690	],
2691	);
2692	transmute(r)
2693	}
2694
2695	/// Shifts packed 32-bit integers in `a` left by the amount
2696	/// specified by the corresponding element in `count` while
2697	/// shifting in zeros, and returns the result.
2698	///
2699	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi32)
2700	#[inline]
2701	#[target_feature(enable = "avx2")]
2702	#[cfg_attr(test, assert_instr(vpsllvd))]
2703	#[stable(feature = "simd_x86", since = "1.27.0")]
2704	pub unsafe fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2705	transmute(src:psllvd(a:a.as_i32x4(), count:count.as_i32x4()))
2706	}
2707
2708	/// Shifts packed 32-bit integers in `a` left by the amount
2709	/// specified by the corresponding element in `count` while
2710	/// shifting in zeros, and returns the result.
2711	///
2712	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi32)
2713	#[inline]
2714	#[target_feature(enable = "avx2")]
2715	#[cfg_attr(test, assert_instr(vpsllvd))]
2716	#[stable(feature = "simd_x86", since = "1.27.0")]
2717	pub unsafe fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2718	transmute(src:psllvd256(a:a.as_i32x8(), count:count.as_i32x8()))
2719	}
2720
2721	/// Shifts packed 64-bit integers in `a` left by the amount
2722	/// specified by the corresponding element in `count` while
2723	/// shifting in zeros, and returns the result.
2724	///
2725	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi64)
2726	#[inline]
2727	#[target_feature(enable = "avx2")]
2728	#[cfg_attr(test, assert_instr(vpsllvq))]
2729	#[stable(feature = "simd_x86", since = "1.27.0")]
2730	pub unsafe fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2731	transmute(src:psllvq(a:a.as_i64x2(), count:count.as_i64x2()))
2732	}
2733
2734	/// Shifts packed 64-bit integers in `a` left by the amount
2735	/// specified by the corresponding element in `count` while
2736	/// shifting in zeros, and returns the result.
2737	///
2738	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi64)
2739	#[inline]
2740	#[target_feature(enable = "avx2")]
2741	#[cfg_attr(test, assert_instr(vpsllvq))]
2742	#[stable(feature = "simd_x86", since = "1.27.0")]
2743	pub unsafe fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2744	transmute(src:psllvq256(a:a.as_i64x4(), count:count.as_i64x4()))
2745	}
2746
2747	/// Shifts packed 16-bit integers in `a` right by `count` while
2748	/// shifting in sign bits.
2749	///
2750	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi16)
2751	#[inline]
2752	#[target_feature(enable = "avx2")]
2753	#[cfg_attr(test, assert_instr(vpsraw))]
2754	#[stable(feature = "simd_x86", since = "1.27.0")]
2755	pub unsafe fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2756	transmute(src:psraw(a:a.as_i16x16(), count:count.as_i16x8()))
2757	}
2758
2759	/// Shifts packed 32-bit integers in `a` right by `count` while
2760	/// shifting in sign bits.
2761	///
2762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi32)
2763	#[inline]
2764	#[target_feature(enable = "avx2")]
2765	#[cfg_attr(test, assert_instr(vpsrad))]
2766	#[stable(feature = "simd_x86", since = "1.27.0")]
2767	pub unsafe fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2768	transmute(src:psrad(a:a.as_i32x8(), count:count.as_i32x4()))
2769	}
2770
2771	/// Shifts packed 16-bit integers in `a` right by `IMM8` while
2772	/// shifting in sign bits.
2773	///
2774	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi16)
2775	#[inline]
2776	#[target_feature(enable = "avx2")]
2777	#[cfg_attr(test, assert_instr(vpsraw, IMM8 = `7`))]
2778	#[rustc_legacy_const_generics(`1`)]
2779	#[stable(feature = "simd_x86", since = "1.27.0")]
2780	pub unsafe fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2781	static_assert_uimm_bits!(IMM8, `8`);
2782	transmute(src:simd_shr(x:a.as_i16x16(), y:i16x16::splat(IMM8.min(`15`) as i16)))
2783	}
2784
2785	/// Shifts packed 32-bit integers in `a` right by `IMM8` while
2786	/// shifting in sign bits.
2787	///
2788	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi32)
2789	#[inline]
2790	#[target_feature(enable = "avx2")]
2791	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `7`))]
2792	#[rustc_legacy_const_generics(`1`)]
2793	#[stable(feature = "simd_x86", since = "1.27.0")]
2794	pub unsafe fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2795	static_assert_uimm_bits!(IMM8, `8`);
2796	transmute(src:simd_shr(x:a.as_i32x8(), y:i32x8::splat(IMM8.min(`31`))))
2797	}
2798
2799	/// Shifts packed 32-bit integers in `a` right by the amount specified by the
2800	/// corresponding element in `count` while shifting in sign bits.
2801	///
2802	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi32)
2803	#[inline]
2804	#[target_feature(enable = "avx2")]
2805	#[cfg_attr(test, assert_instr(vpsravd))]
2806	#[stable(feature = "simd_x86", since = "1.27.0")]
2807	pub unsafe fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2808	transmute(src:psravd(a:a.as_i32x4(), count:count.as_i32x4()))
2809	}
2810
2811	/// Shifts packed 32-bit integers in `a` right by the amount specified by the
2812	/// corresponding element in `count` while shifting in sign bits.
2813	///
2814	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi32)
2815	#[inline]
2816	#[target_feature(enable = "avx2")]
2817	#[cfg_attr(test, assert_instr(vpsravd))]
2818	#[stable(feature = "simd_x86", since = "1.27.0")]
2819	pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2820	transmute(src:psravd256(a:a.as_i32x8(), count:count.as_i32x8()))
2821	}
2822
2823	/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2824	///
2825	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_si256)
2826	#[inline]
2827	#[target_feature(enable = "avx2")]
2828	#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = `1`))]
2829	#[rustc_legacy_const_generics(`1`)]
2830	#[stable(feature = "simd_x86", since = "1.27.0")]
2831	pub unsafe fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2832	static_assert_uimm_bits!(IMM8, `8`);
2833	_mm256_bsrli_epi128::<IMM8>(a)
2834	}
2835
2836	/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2837	///
2838	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bsrli_epi128)
2839	#[inline]
2840	#[target_feature(enable = "avx2")]
2841	#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = `1`))]
2842	#[rustc_legacy_const_generics(`1`)]
2843	#[stable(feature = "simd_x86", since = "1.27.0")]
2844	pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2845	static_assert_uimm_bits!(IMM8, `8`);
2846	let a = a.as_i8x32();
2847	let zero = _mm256_setzero_si256().as_i8x32();
2848	let r: i8x32 = match IMM8 % `16` {
2849	`0` => simd_shuffle!(
2850	a,
2851	zero,
2852	[
2853	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`,
2854	`23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
2855	],
2856	),
2857	`1` => simd_shuffle!(
2858	a,
2859	zero,
2860	[
2861	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
2862	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
2863	],
2864	),
2865	`2` => simd_shuffle!(
2866	a,
2867	zero,
2868	[
2869	`2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
2870	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `32`,
2871	],
2872	),
2873	`3` => simd_shuffle!(
2874	a,
2875	zero,
2876	[
2877	`3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `19`, `20`, `21`, `22`, `23`, `24`,
2878	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `32`, `32`,
2879	],
2880	),
2881	`4` => simd_shuffle!(
2882	a,
2883	zero,
2884	[
2885	`4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `20`, `21`, `22`, `23`, `24`, `25`,
2886	`26`, `27`, `28`, `29`, `30`, `31`, `32`, `32`, `32`, `32`,
2887	],
2888	),
2889	`5` => simd_shuffle!(
2890	a,
2891	zero,
2892	[
2893	`5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `21`, `22`, `23`, `24`, `25`, `26`,
2894	`27`, `28`, `29`, `30`, `31`, `32`, `32`, `32`, `32`, `32`,
2895	],
2896	),
2897	`6` => simd_shuffle!(
2898	a,
2899	zero,
2900	[
2901	`6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `22`, `23`, `24`, `25`, `26`, `27`,
2902	`28`, `29`, `30`, `31`, `32`, `32`, `32`, `32`, `32`, `32`,
2903	],
2904	),
2905	`7` => simd_shuffle!(
2906	a,
2907	zero,
2908	[
2909	`7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `23`, `24`, `25`, `26`, `27`,
2910	`28`, `29`, `30`, `31`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2911	],
2912	),
2913	`8` => simd_shuffle!(
2914	a,
2915	zero,
2916	[
2917	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `24`, `25`, `26`, `27`, `28`,
2918	`29`, `30`, `31`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2919	],
2920	),
2921	`9` => simd_shuffle!(
2922	a,
2923	zero,
2924	[
2925	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `25`, `26`, `27`, `28`, `29`,
2926	`30`, `31`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2927	],
2928	),
2929	`10` => simd_shuffle!(
2930	a,
2931	zero,
2932	[
2933	`10`, `11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `26`, `27`, `28`, `29`, `30`,
2934	`31`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2935	],
2936	),
2937	`11` => simd_shuffle!(
2938	a,
2939	zero,
2940	[
2941	`11`, `12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `27`, `28`, `29`, `30`, `31`,
2942	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2943	],
2944	),
2945	`12` => simd_shuffle!(
2946	a,
2947	zero,
2948	[
2949	`12`, `13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `28`, `29`, `30`, `31`, `32`,
2950	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2951	],
2952	),
2953	`13` => simd_shuffle!(
2954	a,
2955	zero,
2956	[
2957	`13`, `14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `29`, `30`, `31`, `32`, `32`,
2958	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2959	],
2960	),
2961	`14` => simd_shuffle!(
2962	a,
2963	zero,
2964	[
2965	`14`, `15`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `30`, `31`, `32`, `32`, `32`,
2966	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2967	],
2968	),
2969	`15` => simd_shuffle!(
2970	a,
2971	zero,
2972	[
2973	`14`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `31`, `32`, `32`, `32`, `32`,
2974	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
2975	],
2976	),
2977	_ => zero,
2978	};
2979	transmute(r)
2980	}
2981
2982	/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
2983	/// zeros.
2984	///
2985	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi16)
2986	#[inline]
2987	#[target_feature(enable = "avx2")]
2988	#[cfg_attr(test, assert_instr(vpsrlw))]
2989	#[stable(feature = "simd_x86", since = "1.27.0")]
2990	pub unsafe fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
2991	transmute(src:psrlw(a:a.as_i16x16(), count:count.as_i16x8()))
2992	}
2993
2994	/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
2995	/// zeros.
2996	///
2997	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi32)
2998	#[inline]
2999	#[target_feature(enable = "avx2")]
3000	#[cfg_attr(test, assert_instr(vpsrld))]
3001	#[stable(feature = "simd_x86", since = "1.27.0")]
3002	pub unsafe fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3003	transmute(src:psrld(a:a.as_i32x8(), count:count.as_i32x4()))
3004	}
3005
3006	/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
3007	/// zeros.
3008	///
3009	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi64)
3010	#[inline]
3011	#[target_feature(enable = "avx2")]
3012	#[cfg_attr(test, assert_instr(vpsrlq))]
3013	#[stable(feature = "simd_x86", since = "1.27.0")]
3014	pub unsafe fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3015	transmute(src:psrlq(a:a.as_i64x4(), count:count.as_i64x2()))
3016	}
3017
3018	/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
3019	/// zeros
3020	///
3021	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi16)
3022	#[inline]
3023	#[target_feature(enable = "avx2")]
3024	#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = `7`))]
3025	#[rustc_legacy_const_generics(`1`)]
3026	#[stable(feature = "simd_x86", since = "1.27.0")]
3027	pub unsafe fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3028	static_assert_uimm_bits!(IMM8, `8`);
3029	if IMM8 >= `16` {
3030	_mm256_setzero_si256()
3031	} else {
3032	transmute(src:simd_shr(x:a.as_u16x16(), y:u16x16::splat(IMM8 as u16)))
3033	}
3034	}
3035
3036	/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
3037	/// zeros
3038	///
3039	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi32)
3040	#[inline]
3041	#[target_feature(enable = "avx2")]
3042	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `7`))]
3043	#[rustc_legacy_const_generics(`1`)]
3044	#[stable(feature = "simd_x86", since = "1.27.0")]
3045	pub unsafe fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3046	static_assert_uimm_bits!(IMM8, `8`);
3047	if IMM8 >= `32` {
3048	_mm256_setzero_si256()
3049	} else {
3050	transmute(src:simd_shr(x:a.as_u32x8(), y:u32x8::splat(IMM8 as u32)))
3051	}
3052	}
3053
3054	/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
3055	/// zeros
3056	///
3057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi64)
3058	#[inline]
3059	#[target_feature(enable = "avx2")]
3060	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `7`))]
3061	#[rustc_legacy_const_generics(`1`)]
3062	#[stable(feature = "simd_x86", since = "1.27.0")]
3063	pub unsafe fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3064	static_assert_uimm_bits!(IMM8, `8`);
3065	if IMM8 >= `64` {
3066	_mm256_setzero_si256()
3067	} else {
3068	transmute(src:simd_shr(x:a.as_u64x4(), y:u64x4::splat(IMM8 as u64)))
3069	}
3070	}
3071
3072	/// Shifts packed 32-bit integers in `a` right by the amount specified by
3073	/// the corresponding element in `count` while shifting in zeros,
3074	///
3075	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi32)
3076	#[inline]
3077	#[target_feature(enable = "avx2")]
3078	#[cfg_attr(test, assert_instr(vpsrlvd))]
3079	#[stable(feature = "simd_x86", since = "1.27.0")]
3080	pub unsafe fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3081	transmute(src:psrlvd(a:a.as_i32x4(), count:count.as_i32x4()))
3082	}
3083
3084	/// Shifts packed 32-bit integers in `a` right by the amount specified by
3085	/// the corresponding element in `count` while shifting in zeros,
3086	///
3087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi32)
3088	#[inline]
3089	#[target_feature(enable = "avx2")]
3090	#[cfg_attr(test, assert_instr(vpsrlvd))]
3091	#[stable(feature = "simd_x86", since = "1.27.0")]
3092	pub unsafe fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3093	transmute(src:psrlvd256(a:a.as_i32x8(), count:count.as_i32x8()))
3094	}
3095
3096	/// Shifts packed 64-bit integers in `a` right by the amount specified by
3097	/// the corresponding element in `count` while shifting in zeros,
3098	///
3099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi64)
3100	#[inline]
3101	#[target_feature(enable = "avx2")]
3102	#[cfg_attr(test, assert_instr(vpsrlvq))]
3103	#[stable(feature = "simd_x86", since = "1.27.0")]
3104	pub unsafe fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3105	transmute(src:psrlvq(a:a.as_i64x2(), count:count.as_i64x2()))
3106	}
3107
3108	/// Shifts packed 64-bit integers in `a` right by the amount specified by
3109	/// the corresponding element in `count` while shifting in zeros,
3110	///
3111	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi64)
3112	#[inline]
3113	#[target_feature(enable = "avx2")]
3114	#[cfg_attr(test, assert_instr(vpsrlvq))]
3115	#[stable(feature = "simd_x86", since = "1.27.0")]
3116	pub unsafe fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3117	transmute(src:psrlvq256(a:a.as_i64x4(), count:count.as_i64x4()))
3118	}
3119
3120	// TODO _mm256_stream_load_si256 (__m256i const mem_addr)*
3121
3122	/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
3123	///
3124	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi16)
3125	#[inline]
3126	#[target_feature(enable = "avx2")]
3127	#[cfg_attr(test, assert_instr(vpsubw))]
3128	#[stable(feature = "simd_x86", since = "1.27.0")]
3129	pub unsafe fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3130	transmute(src:simd_sub(x:a.as_i16x16(), y:b.as_i16x16()))
3131	}
3132
3133	/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`
3134	///
3135	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi32)
3136	#[inline]
3137	#[target_feature(enable = "avx2")]
3138	#[cfg_attr(test, assert_instr(vpsubd))]
3139	#[stable(feature = "simd_x86", since = "1.27.0")]
3140	pub unsafe fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3141	transmute(src:simd_sub(x:a.as_i32x8(), y:b.as_i32x8()))
3142	}
3143
3144	/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`
3145	///
3146	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi64)
3147	#[inline]
3148	#[target_feature(enable = "avx2")]
3149	#[cfg_attr(test, assert_instr(vpsubq))]
3150	#[stable(feature = "simd_x86", since = "1.27.0")]
3151	pub unsafe fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3152	transmute(src:simd_sub(x:a.as_i64x4(), y:b.as_i64x4()))
3153	}
3154
3155	/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
3156	///
3157	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi8)
3158	#[inline]
3159	#[target_feature(enable = "avx2")]
3160	#[cfg_attr(test, assert_instr(vpsubb))]
3161	#[stable(feature = "simd_x86", since = "1.27.0")]
3162	pub unsafe fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3163	transmute(src:simd_sub(x:a.as_i8x32(), y:b.as_i8x32()))
3164	}
3165
3166	/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in
3167	/// `a` using saturation.
3168	///
3169	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi16)
3170	#[inline]
3171	#[target_feature(enable = "avx2")]
3172	#[cfg_attr(test, assert_instr(vpsubsw))]
3173	#[stable(feature = "simd_x86", since = "1.27.0")]
3174	pub unsafe fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3175	transmute(src:simd_saturating_sub(x:a.as_i16x16(), y:b.as_i16x16()))
3176	}
3177
3178	/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in
3179	/// `a` using saturation.
3180	///
3181	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi8)
3182	#[inline]
3183	#[target_feature(enable = "avx2")]
3184	#[cfg_attr(test, assert_instr(vpsubsb))]
3185	#[stable(feature = "simd_x86", since = "1.27.0")]
3186	pub unsafe fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3187	transmute(src:simd_saturating_sub(x:a.as_i8x32(), y:b.as_i8x32()))
3188	}
3189
3190	/// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit
3191	/// integers in `a` using saturation.
3192	///
3193	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu16)
3194	#[inline]
3195	#[target_feature(enable = "avx2")]
3196	#[cfg_attr(test, assert_instr(vpsubusw))]
3197	#[stable(feature = "simd_x86", since = "1.27.0")]
3198	pub unsafe fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3199	transmute(src:simd_saturating_sub(x:a.as_u16x16(), y:b.as_u16x16()))
3200	}
3201
3202	/// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit
3203	/// integers in `a` using saturation.
3204	///
3205	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu8)
3206	#[inline]
3207	#[target_feature(enable = "avx2")]
3208	#[cfg_attr(test, assert_instr(vpsubusb))]
3209	#[stable(feature = "simd_x86", since = "1.27.0")]
3210	pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3211	transmute(src:simd_saturating_sub(x:a.as_u8x32(), y:b.as_u8x32()))
3212	}
3213
3214	/// Unpacks and interleave 8-bit integers from the high half of each
3215	/// 128-bit lane in `a` and `b`.
3216	///
3217	/// ```rust
3218	/// #[cfg(target_arch = "x86")]
3219	/// use std::arch::x86::*;
3220	/// #[cfg(target_arch = "x86_64")]
3221	/// use std::arch::x86_64::*;
3222	///
3223	/// # fn main() {
3224	/// # if is_x86_feature_detected!("avx2") {
3225	/// # #[target_feature(enable = "avx2")]
3226	/// # unsafe fn worker() {
3227	/// let a = _mm256_setr_epi8(
3228	/// `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`,
3229	/// `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3230	/// );
3231	/// let b = _mm256_setr_epi8(
3232	/// `0`, `-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`, `-8`, `-9`, `-10`, `-11`, `-12`, `-13`, `-14`, `-15`,
3233	/// `-16`, `-17`, `-18`, `-19`, `-20`, `-21`, `-22`, `-23`, `-24`, `-25`, `-26`, `-27`, `-28`, `-29`,
3234	/// `-30`, `-31`,
3235	/// );
3236	///
3237	/// let c = _mm256_unpackhi_epi8(a, b);
3238	///
3239	/// let expected = _mm256_setr_epi8(
3240	/// `8`, `-8`, `9`, `-9`, `10`, `-10`, `11`, `-11`, `12`, `-12`, `13`, `-13`, `14`, `-14`, `15`, `-15`,
3241	/// `24`, `-24`, `25`, `-25`, `26`, `-26`, `27`, `-27`, `28`, `-28`, `29`, `-29`, `30`, `-30`, `31`,
3242	/// `-31`,
3243	/// );
3244	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3245	///
3246	/// # }
3247	/// # unsafe { worker(); }
3248	/// # }
3249	/// # }
3250	/// ```
3251	///
3252	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi8)
3253	#[inline]
3254	#[target_feature(enable = "avx2")]
3255	#[cfg_attr(test, assert_instr(vpunpckhbw))]
3256	#[stable(feature = "simd_x86", since = "1.27.0")]
3257	pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3258	#[rustfmt::skip]
3259	let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3260	`8`, `40`, `9`, `41`, `10`, `42`, `11`, `43`,
3261	`12`, `44`, `13`, `45`, `14`, `46`, `15`, `47`,
3262	`24`, `56`, `25`, `57`, `26`, `58`, `27`, `59`,
3263	`28`, `60`, `29`, `61`, `30`, `62`, `31`, `63`,
3264	]);
3265	transmute(src:r)
3266	}
3267
3268	/// Unpacks and interleave 8-bit integers from the low half of each
3269	/// 128-bit lane of `a` and `b`.
3270	///
3271	/// ```rust
3272	/// #[cfg(target_arch = "x86")]
3273	/// use std::arch::x86::*;
3274	/// #[cfg(target_arch = "x86_64")]
3275	/// use std::arch::x86_64::*;
3276	///
3277	/// # fn main() {
3278	/// # if is_x86_feature_detected!("avx2") {
3279	/// # #[target_feature(enable = "avx2")]
3280	/// # unsafe fn worker() {
3281	/// let a = _mm256_setr_epi8(
3282	/// `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`,
3283	/// `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3284	/// );
3285	/// let b = _mm256_setr_epi8(
3286	/// `0`, `-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`, `-8`, `-9`, `-10`, `-11`, `-12`, `-13`, `-14`, `-15`,
3287	/// `-16`, `-17`, `-18`, `-19`, `-20`, `-21`, `-22`, `-23`, `-24`, `-25`, `-26`, `-27`, `-28`, `-29`,
3288	/// `-30`, `-31`,
3289	/// );
3290	///
3291	/// let c = _mm256_unpacklo_epi8(a, b);
3292	///
3293	/// let expected = _mm256_setr_epi8(
3294	/// `0`, `0`, `1`, `-1`, `2`, `-2`, `3`, `-3`, `4`, `-4`, `5`, `-5`, `6`, `-6`, `7`, `-7`, `16`, `-16`, `17`,
3295	/// `-17`, `18`, `-18`, `19`, `-19`, `20`, `-20`, `21`, `-21`, `22`, `-22`, `23`, `-23`,
3296	/// );
3297	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3298	///
3299	/// # }
3300	/// # unsafe { worker(); }
3301	/// # }
3302	/// # }
3303	/// ```
3304	///
3305	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi8)
3306	#[inline]
3307	#[target_feature(enable = "avx2")]
3308	#[cfg_attr(test, assert_instr(vpunpcklbw))]
3309	#[stable(feature = "simd_x86", since = "1.27.0")]
3310	pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3311	#[rustfmt::skip]
3312	let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3313	`0`, `32`, `1`, `33`, `2`, `34`, `3`, `35`,
3314	`4`, `36`, `5`, `37`, `6`, `38`, `7`, `39`,
3315	`16`, `48`, `17`, `49`, `18`, `50`, `19`, `51`,
3316	`20`, `52`, `21`, `53`, `22`, `54`, `23`, `55`,
3317	]);
3318	transmute(src:r)
3319	}
3320
3321	/// Unpacks and interleave 16-bit integers from the high half of each
3322	/// 128-bit lane of `a` and `b`.
3323	///
3324	/// ```rust
3325	/// #[cfg(target_arch = "x86")]
3326	/// use std::arch::x86::*;
3327	/// #[cfg(target_arch = "x86_64")]
3328	/// use std::arch::x86_64::*;
3329	///
3330	/// # fn main() {
3331	/// # if is_x86_feature_detected!("avx2") {
3332	/// # #[target_feature(enable = "avx2")]
3333	/// # unsafe fn worker() {
3334	/// let a = _mm256_setr_epi16(
3335	/// `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3336	/// );
3337	/// let b = _mm256_setr_epi16(
3338	/// `0`, `-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`, `-8`, `-9`, `-10`, `-11`, `-12`, `-13`, `-14`, `-15`,
3339	/// );
3340	///
3341	/// let c = _mm256_unpackhi_epi16(a, b);
3342	///
3343	/// let expected = _mm256_setr_epi16(
3344	/// `4`, `-4`, `5`, `-5`, `6`, `-6`, `7`, `-7`, `12`, `-12`, `13`, `-13`, `14`, `-14`, `15`, `-15`,
3345	/// );
3346	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3347	///
3348	/// # }
3349	/// # unsafe { worker(); }
3350	/// # }
3351	/// # }
3352	/// ```
3353	///
3354	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi16)
3355	#[inline]
3356	#[target_feature(enable = "avx2")]
3357	#[cfg_attr(test, assert_instr(vpunpckhwd))]
3358	#[stable(feature = "simd_x86", since = "1.27.0")]
3359	pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3360	let r: i16x16 = simd_shuffle!(
3361	a.as_i16x16(),
3362	b.as_i16x16(),
3363	[`4`, `20`, `5`, `21`, `6`, `22`, `7`, `23`, `12`, `28`, `13`, `29`, `14`, `30`, `15`, `31`],
3364	);
3365	transmute(src:r)
3366	}
3367
3368	/// Unpacks and interleave 16-bit integers from the low half of each
3369	/// 128-bit lane of `a` and `b`.
3370	///
3371	/// ```rust
3372	/// #[cfg(target_arch = "x86")]
3373	/// use std::arch::x86::*;
3374	/// #[cfg(target_arch = "x86_64")]
3375	/// use std::arch::x86_64::*;
3376	///
3377	/// # fn main() {
3378	/// # if is_x86_feature_detected!("avx2") {
3379	/// # #[target_feature(enable = "avx2")]
3380	/// # unsafe fn worker() {
3381	///
3382	/// let a = _mm256_setr_epi16(
3383	/// `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3384	/// );
3385	/// let b = _mm256_setr_epi16(
3386	/// `0`, `-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`, `-8`, `-9`, `-10`, `-11`, `-12`, `-13`, `-14`, `-15`,
3387	/// );
3388	///
3389	/// let c = _mm256_unpacklo_epi16(a, b);
3390	///
3391	/// let expected = _mm256_setr_epi16(
3392	/// `0`, `0`, `1`, `-1`, `2`, `-2`, `3`, `-3`, `8`, `-8`, `9`, `-9`, `10`, `-10`, `11`, `-11`,
3393	/// );
3394	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3395	///
3396	/// # }
3397	/// # unsafe { worker(); }
3398	/// # }
3399	/// # }
3400	/// ```
3401	///
3402	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi16)
3403	#[inline]
3404	#[target_feature(enable = "avx2")]
3405	#[cfg_attr(test, assert_instr(vpunpcklwd))]
3406	#[stable(feature = "simd_x86", since = "1.27.0")]
3407	pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3408	let r: i16x16 = simd_shuffle!(
3409	a.as_i16x16(),
3410	b.as_i16x16(),
3411	[`0`, `16`, `1`, `17`, `2`, `18`, `3`, `19`, `8`, `24`, `9`, `25`, `10`, `26`, `11`, `27`],
3412	);
3413	transmute(src:r)
3414	}
3415
3416	/// Unpacks and interleave 32-bit integers from the high half of each
3417	/// 128-bit lane of `a` and `b`.
3418	///
3419	/// ```rust
3420	/// #[cfg(target_arch = "x86")]
3421	/// use std::arch::x86::*;
3422	/// #[cfg(target_arch = "x86_64")]
3423	/// use std::arch::x86_64::*;
3424	///
3425	/// # fn main() {
3426	/// # if is_x86_feature_detected!("avx2") {
3427	/// # #[target_feature(enable = "avx2")]
3428	/// # unsafe fn worker() {
3429	/// let a = _mm256_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3430	/// let b = _mm256_setr_epi32(`0`, `-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`);
3431	///
3432	/// let c = _mm256_unpackhi_epi32(a, b);
3433	///
3434	/// let expected = _mm256_setr_epi32(`2`, `-2`, `3`, `-3`, `6`, `-6`, `7`, `-7`);
3435	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3436	///
3437	/// # }
3438	/// # unsafe { worker(); }
3439	/// # }
3440	/// # }
3441	/// ```
3442	///
3443	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi32)
3444	#[inline]
3445	#[target_feature(enable = "avx2")]
3446	#[cfg_attr(test, assert_instr(vunpckhps))]
3447	#[stable(feature = "simd_x86", since = "1.27.0")]
3448	pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3449	let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [`2`, `10`, `3`, `11`, `6`, `14`, `7`, `15`]);
3450	transmute(src:r)
3451	}
3452
3453	/// Unpacks and interleave 32-bit integers from the low half of each
3454	/// 128-bit lane of `a` and `b`.
3455	///
3456	/// ```rust
3457	/// #[cfg(target_arch = "x86")]
3458	/// use std::arch::x86::*;
3459	/// #[cfg(target_arch = "x86_64")]
3460	/// use std::arch::x86_64::*;
3461	///
3462	/// # fn main() {
3463	/// # if is_x86_feature_detected!("avx2") {
3464	/// # #[target_feature(enable = "avx2")]
3465	/// # unsafe fn worker() {
3466	/// let a = _mm256_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3467	/// let b = _mm256_setr_epi32(`0`, `-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`);
3468	///
3469	/// let c = _mm256_unpacklo_epi32(a, b);
3470	///
3471	/// let expected = _mm256_setr_epi32(`0`, `0`, `1`, `-1`, `4`, `-4`, `5`, `-5`);
3472	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3473	///
3474	/// # }
3475	/// # unsafe { worker(); }
3476	/// # }
3477	/// # }
3478	/// ```
3479	///
3480	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi32)
3481	#[inline]
3482	#[target_feature(enable = "avx2")]
3483	#[cfg_attr(test, assert_instr(vunpcklps))]
3484	#[stable(feature = "simd_x86", since = "1.27.0")]
3485	pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3486	let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [`0`, `8`, `1`, `9`, `4`, `12`, `5`, `13`]);
3487	transmute(src:r)
3488	}
3489
3490	/// Unpacks and interleave 64-bit integers from the high half of each
3491	/// 128-bit lane of `a` and `b`.
3492	///
3493	/// ```rust
3494	/// #[cfg(target_arch = "x86")]
3495	/// use std::arch::x86::*;
3496	/// #[cfg(target_arch = "x86_64")]
3497	/// use std::arch::x86_64::*;
3498	///
3499	/// # fn main() {
3500	/// # if is_x86_feature_detected!("avx2") {
3501	/// # #[target_feature(enable = "avx2")]
3502	/// # unsafe fn worker() {
3503	/// let a = _mm256_setr_epi64x(`0`, `1`, `2`, `3`);
3504	/// let b = _mm256_setr_epi64x(`0`, `-1`, `-2`, `-3`);
3505	///
3506	/// let c = _mm256_unpackhi_epi64(a, b);
3507	///
3508	/// let expected = _mm256_setr_epi64x(`1`, `-1`, `3`, `-3`);
3509	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3510	///
3511	/// # }
3512	/// # unsafe { worker(); }
3513	/// # }
3514	/// # }
3515	/// ```
3516	///
3517	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi64)
3518	#[inline]
3519	#[target_feature(enable = "avx2")]
3520	#[cfg_attr(test, assert_instr(vunpckhpd))]
3521	#[stable(feature = "simd_x86", since = "1.27.0")]
3522	pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3523	let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [`1`, `5`, `3`, `7`]);
3524	transmute(src:r)
3525	}
3526
3527	/// Unpacks and interleave 64-bit integers from the low half of each
3528	/// 128-bit lane of `a` and `b`.
3529	///
3530	/// ```rust
3531	/// #[cfg(target_arch = "x86")]
3532	/// use std::arch::x86::*;
3533	/// #[cfg(target_arch = "x86_64")]
3534	/// use std::arch::x86_64::*;
3535	///
3536	/// # fn main() {
3537	/// # if is_x86_feature_detected!("avx2") {
3538	/// # #[target_feature(enable = "avx2")]
3539	/// # unsafe fn worker() {
3540	/// let a = _mm256_setr_epi64x(`0`, `1`, `2`, `3`);
3541	/// let b = _mm256_setr_epi64x(`0`, `-1`, `-2`, `-3`);
3542	///
3543	/// let c = _mm256_unpacklo_epi64(a, b);
3544	///
3545	/// let expected = _mm256_setr_epi64x(`0`, `0`, `2`, `-2`);
3546	/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !`0`);
3547	///
3548	/// # }
3549	/// # unsafe { worker(); }
3550	/// # }
3551	/// # }
3552	/// ```
3553	///
3554	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi64)
3555	#[inline]
3556	#[target_feature(enable = "avx2")]
3557	#[cfg_attr(test, assert_instr(vunpcklpd))]
3558	#[stable(feature = "simd_x86", since = "1.27.0")]
3559	pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3560	let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [`0`, `4`, `2`, `6`]);
3561	transmute(src:r)
3562	}
3563
3564	/// Computes the bitwise XOR of 256 bits (representing integer data)
3565	/// in `a` and `b`
3566	///
3567	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_si256)
3568	#[inline]
3569	#[target_feature(enable = "avx2")]
3570	#[cfg_attr(test, assert_instr(vxorps))]
3571	#[stable(feature = "simd_x86", since = "1.27.0")]
3572	pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3573	transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4()))
3574	}
3575
3576	/// Extracts an 8-bit integer from `a`, selected with `INDEX`. Returns a 32-bit
3577	/// integer containing the zero-extended integer data.
3578	///
3579	/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
3580	///
3581	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi8)
3582	#[inline]
3583	#[target_feature(enable = "avx2")]
3584	// This intrinsic has no corresponding instruction.
3585	#[rustc_legacy_const_generics(`1`)]
3586	#[stable(feature = "simd_x86", since = "1.27.0")]
3587	pub unsafe fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3588	static_assert_uimm_bits!(INDEX, `5`);
3589	simd_extract::<_, u8>(x:a.as_u8x32(), INDEX as u32) as i32
3590	}
3591
3592	/// Extracts a 16-bit integer from `a`, selected with `INDEX`. Returns a 32-bit
3593	/// integer containing the zero-extended integer data.
3594	///
3595	/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
3596	///
3597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi16)
3598	#[inline]
3599	#[target_feature(enable = "avx2")]
3600	// This intrinsic has no corresponding instruction.
3601	#[rustc_legacy_const_generics(`1`)]
3602	#[stable(feature = "simd_x86", since = "1.27.0")]
3603	pub unsafe fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3604	static_assert_uimm_bits!(INDEX, `4`);
3605	simd_extract::<_, u16>(x:a.as_u16x16(), INDEX as u32) as i32
3606	}
3607
3608	/// Extracts a 32-bit integer from `a`, selected with `INDEX`.
3609	///
3610	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32)
3611	#[inline]
3612	#[target_feature(enable = "avx2")]
3613	// This intrinsic has no corresponding instruction.
3614	#[rustc_legacy_const_generics(`1`)]
3615	#[stable(feature = "simd_x86", since = "1.27.0")]
3616	pub unsafe fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
3617	static_assert_uimm_bits!(INDEX, `3`);
3618	simd_extract(x:a.as_i32x8(), INDEX as u32)
3619	}
3620
3621	/// Returns the first element of the input vector of `[4 x double]`.
3622	///
3623	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsd_f64)
3624	#[inline]
3625	#[target_feature(enable = "avx2")]
3626	//#[cfg_attr(test, assert_instr(movsd))] FIXME
3627	#[stable(feature = "simd_x86", since = "1.27.0")]
3628	pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 {
3629	simd_extract(x:a, idx:`0`)
3630	}
3631
3632	/// Returns the first element of the input vector of `[8 x i32]`.
3633	///
3634	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32)
3635	#[inline]
3636	#[target_feature(enable = "avx2")]
3637	#[stable(feature = "simd_x86", since = "1.27.0")]
3638	pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
3639	simd_extract(x:a.as_i32x8(), idx:`0`)
3640	}
3641
3642	#[allow(improper_ctypes)]
3643	extern "C" {
3644	#[link_name = "llvm.x86.avx2.pabs.b"]
3645	fn pabsb(a: i8x32) -> u8x32;
3646	#[link_name = "llvm.x86.avx2.pabs.w"]
3647	fn pabsw(a: i16x16) -> u16x16;
3648	#[link_name = "llvm.x86.avx2.pabs.d"]
3649	fn pabsd(a: i32x8) -> u32x8;
3650	#[link_name = "llvm.x86.avx2.phadd.w"]
3651	fn phaddw(a: i16x16, b: i16x16) -> i16x16;
3652	#[link_name = "llvm.x86.avx2.phadd.d"]
3653	fn phaddd(a: i32x8, b: i32x8) -> i32x8;
3654	#[link_name = "llvm.x86.avx2.phadd.sw"]
3655	fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3656	#[link_name = "llvm.x86.avx2.phsub.w"]
3657	fn phsubw(a: i16x16, b: i16x16) -> i16x16;
3658	#[link_name = "llvm.x86.avx2.phsub.d"]
3659	fn phsubd(a: i32x8, b: i32x8) -> i32x8;
3660	#[link_name = "llvm.x86.avx2.phsub.sw"]
3661	fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3662	#[link_name = "llvm.x86.avx2.pmadd.wd"]
3663	fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3664	#[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3665	fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
3666	#[link_name = "llvm.x86.avx2.maskload.d"]
3667	fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
3668	#[link_name = "llvm.x86.avx2.maskload.d.256"]
3669	fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
3670	#[link_name = "llvm.x86.avx2.maskload.q"]
3671	fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
3672	#[link_name = "llvm.x86.avx2.maskload.q.256"]
3673	fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
3674	#[link_name = "llvm.x86.avx2.maskstore.d"]
3675	fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
3676	#[link_name = "llvm.x86.avx2.maskstore.d.256"]
3677	fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
3678	#[link_name = "llvm.x86.avx2.maskstore.q"]
3679	fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
3680	#[link_name = "llvm.x86.avx2.maskstore.q.256"]
3681	fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
3682	#[link_name = "llvm.x86.avx2.mpsadbw"]
3683	fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
3684	#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3685	fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3686	#[link_name = "llvm.x86.avx2.packsswb"]
3687	fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3688	#[link_name = "llvm.x86.avx2.packssdw"]
3689	fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3690	#[link_name = "llvm.x86.avx2.packuswb"]
3691	fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3692	#[link_name = "llvm.x86.avx2.packusdw"]
3693	fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3694	#[link_name = "llvm.x86.avx2.psad.bw"]
3695	fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3696	#[link_name = "llvm.x86.avx2.psign.b"]
3697	fn psignb(a: i8x32, b: i8x32) -> i8x32;
3698	#[link_name = "llvm.x86.avx2.psign.w"]
3699	fn psignw(a: i16x16, b: i16x16) -> i16x16;
3700	#[link_name = "llvm.x86.avx2.psign.d"]
3701	fn psignd(a: i32x8, b: i32x8) -> i32x8;
3702	#[link_name = "llvm.x86.avx2.psll.w"]
3703	fn psllw(a: i16x16, count: i16x8) -> i16x16;
3704	#[link_name = "llvm.x86.avx2.psll.d"]
3705	fn pslld(a: i32x8, count: i32x4) -> i32x8;
3706	#[link_name = "llvm.x86.avx2.psll.q"]
3707	fn psllq(a: i64x4, count: i64x2) -> i64x4;
3708	#[link_name = "llvm.x86.avx2.psllv.d"]
3709	fn psllvd(a: i32x4, count: i32x4) -> i32x4;
3710	#[link_name = "llvm.x86.avx2.psllv.d.256"]
3711	fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
3712	#[link_name = "llvm.x86.avx2.psllv.q"]
3713	fn psllvq(a: i64x2, count: i64x2) -> i64x2;
3714	#[link_name = "llvm.x86.avx2.psllv.q.256"]
3715	fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
3716	#[link_name = "llvm.x86.avx2.psra.w"]
3717	fn psraw(a: i16x16, count: i16x8) -> i16x16;
3718	#[link_name = "llvm.x86.avx2.psra.d"]
3719	fn psrad(a: i32x8, count: i32x4) -> i32x8;
3720	#[link_name = "llvm.x86.avx2.psrav.d"]
3721	fn psravd(a: i32x4, count: i32x4) -> i32x4;
3722	#[link_name = "llvm.x86.avx2.psrav.d.256"]
3723	fn psravd256(a: i32x8, count: i32x8) -> i32x8;
3724	#[link_name = "llvm.x86.avx2.psrl.w"]
3725	fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3726	#[link_name = "llvm.x86.avx2.psrl.d"]
3727	fn psrld(a: i32x8, count: i32x4) -> i32x8;
3728	#[link_name = "llvm.x86.avx2.psrl.q"]
3729	fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3730	#[link_name = "llvm.x86.avx2.psrlv.d"]
3731	fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
3732	#[link_name = "llvm.x86.avx2.psrlv.d.256"]
3733	fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
3734	#[link_name = "llvm.x86.avx2.psrlv.q"]
3735	fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
3736	#[link_name = "llvm.x86.avx2.psrlv.q.256"]
3737	fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
3738	#[link_name = "llvm.x86.avx2.pshuf.b"]
3739	fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3740	#[link_name = "llvm.x86.avx2.permd"]
3741	fn permd(a: u32x8, b: u32x8) -> u32x8;
3742	#[link_name = "llvm.x86.avx2.permps"]
3743	fn permps(a: __m256, b: i32x8) -> __m256;
3744	#[link_name = "llvm.x86.avx2.vperm2i128"]
3745	fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4;
3746	#[link_name = "llvm.x86.avx2.gather.d.d"]
3747	fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3748	#[link_name = "llvm.x86.avx2.gather.d.d.256"]
3749	fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3750	#[link_name = "llvm.x86.avx2.gather.d.q"]
3751	fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3752	#[link_name = "llvm.x86.avx2.gather.d.q.256"]
3753	fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3754	#[link_name = "llvm.x86.avx2.gather.q.d"]
3755	fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3756	#[link_name = "llvm.x86.avx2.gather.q.d.256"]
3757	fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3758	#[link_name = "llvm.x86.avx2.gather.q.q"]
3759	fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3760	#[link_name = "llvm.x86.avx2.gather.q.q.256"]
3761	fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3762	#[link_name = "llvm.x86.avx2.gather.d.pd"]
3763	fn pgatherdpd(
3764	src: __m128d,
3765	slice: *const i8,
3766	offsets: i32x4,
3767	mask: __m128d,
3768	scale: i8,
3769	) -> __m128d;
3770	#[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3771	fn vpgatherdpd(
3772	src: __m256d,
3773	slice: *const i8,
3774	offsets: i32x4,
3775	mask: __m256d,
3776	scale: i8,
3777	) -> __m256d;
3778	#[link_name = "llvm.x86.avx2.gather.q.pd"]
3779	fn pgatherqpd(
3780	src: __m128d,
3781	slice: *const i8,
3782	offsets: i64x2,
3783	mask: __m128d,
3784	scale: i8,
3785	) -> __m128d;
3786	#[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3787	fn vpgatherqpd(
3788	src: __m256d,
3789	slice: *const i8,
3790	offsets: i64x4,
3791	mask: __m256d,
3792	scale: i8,
3793	) -> __m256d;
3794	#[link_name = "llvm.x86.avx2.gather.d.ps"]
3795	fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3796	-> __m128;
3797	#[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3798	fn vpgatherdps(
3799	src: __m256,
3800	slice: *const i8,
3801	offsets: i32x8,
3802	mask: __m256,
3803	scale: i8,
3804	) -> __m256;
3805	#[link_name = "llvm.x86.avx2.gather.q.ps"]
3806	fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3807	-> __m128;
3808	#[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3809	fn vpgatherqps(
3810	src: __m128,
3811	slice: *const i8,
3812	offsets: i64x4,
3813	mask: __m128,
3814	scale: i8,
3815	) -> __m128;
3816	#[link_name = "llvm.x86.avx2.psll.dq"]
3817	fn vpslldq(a: i64x4, b: i32) -> i64x4;
3818	#[link_name = "llvm.x86.avx2.psrl.dq"]
3819	fn vpsrldq(a: i64x4, b: i32) -> i64x4;
3820	}
3821
3822	#[cfg(test)]
3823	mod tests {
3824
3825	use stdarch_test::simd_test;
3826
3827	use crate::core_arch::x86::*;
3828
3829	#[simd_test(enable = "avx2")]
3830	unsafe fn test_mm256_abs_epi32() {
3831	#[rustfmt::skip]
3832	let a = _mm256_setr_epi32(
3833	`0`, `1`, `-1`, i32::MAX,
3834	i32::MIN, `100`, `-100`, `-32`,
3835	);
3836	let r = _mm256_abs_epi32(a);
3837	#[rustfmt::skip]
3838	let e = _mm256_setr_epi32(
3839	`0`, `1`, `1`, i32::MAX,
3840	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
3841	);
3842	assert_eq_m256i(r, e);
3843	}
3844
3845	#[simd_test(enable = "avx2")]
3846	unsafe fn test_mm256_abs_epi16() {
3847	#[rustfmt::skip]
3848	let a = _mm256_setr_epi16(
3849	`0`, `1`, `-1`, `2`, `-2`, `3`, `-3`, `4`,
3850	`-4`, `5`, `-5`, i16::MAX, i16::MIN, `100`, `-100`, `-32`,
3851	);
3852	let r = _mm256_abs_epi16(a);
3853	#[rustfmt::skip]
3854	let e = _mm256_setr_epi16(
3855	`0`, `1`, `1`, `2`, `2`, `3`, `3`, `4`,
3856	`4`, `5`, `5`, i16::MAX, i16::MAX.wrapping_add(`1`), `100`, `100`, `32`,
3857	);
3858	assert_eq_m256i(r, e);
3859	}
3860
3861	#[simd_test(enable = "avx2")]
3862	unsafe fn test_mm256_abs_epi8() {
3863	#[rustfmt::skip]
3864	let a = _mm256_setr_epi8(
3865	`0`, `1`, `-1`, `2`, `-2`, `3`, `-3`, `4`,
3866	`-4`, `5`, `-5`, i8::MAX, i8::MIN, `100`, `-100`, `-32`,
3867	`0`, `1`, `-1`, `2`, `-2`, `3`, `-3`, `4`,
3868	`-4`, `5`, `-5`, i8::MAX, i8::MIN, `100`, `-100`, `-32`,
3869	);
3870	let r = _mm256_abs_epi8(a);
3871	#[rustfmt::skip]
3872	let e = _mm256_setr_epi8(
3873	`0`, `1`, `1`, `2`, `2`, `3`, `3`, `4`,
3874	`4`, `5`, `5`, i8::MAX, i8::MAX.wrapping_add(`1`), `100`, `100`, `32`,
3875	`0`, `1`, `1`, `2`, `2`, `3`, `3`, `4`,
3876	`4`, `5`, `5`, i8::MAX, i8::MAX.wrapping_add(`1`), `100`, `100`, `32`,
3877	);
3878	assert_eq_m256i(r, e);
3879	}
3880
3881	#[simd_test(enable = "avx2")]
3882	unsafe fn test_mm256_add_epi64() {
3883	let a = _mm256_setr_epi64x(`-10`, `0`, `100`, `1_000_000_000`);
3884	let b = _mm256_setr_epi64x(`-1`, `0`, `1`, `2`);
3885	let r = _mm256_add_epi64(a, b);
3886	let e = _mm256_setr_epi64x(`-11`, `0`, `101`, `1_000_000_002`);
3887	assert_eq_m256i(r, e);
3888	}
3889
3890	#[simd_test(enable = "avx2")]
3891	unsafe fn test_mm256_add_epi32() {
3892	let a = _mm256_setr_epi32(`-1`, `0`, `1`, `2`, `3`, `4`, `5`, `6`);
3893	let b = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
3894	let r = _mm256_add_epi32(a, b);
3895	let e = _mm256_setr_epi32(`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`);
3896	assert_eq_m256i(r, e);
3897	}
3898
3899	#[simd_test(enable = "avx2")]
3900	unsafe fn test_mm256_add_epi16() {
3901	#[rustfmt::skip]
3902	let a = _mm256_setr_epi16(
3903	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3904	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3905	);
3906	#[rustfmt::skip]
3907	let b = _mm256_setr_epi16(
3908	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3909	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3910	);
3911	let r = _mm256_add_epi16(a, b);
3912	#[rustfmt::skip]
3913	let e = _mm256_setr_epi16(
3914	`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`,
3915	`16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`,
3916	);
3917	assert_eq_m256i(r, e);
3918	}
3919
3920	#[simd_test(enable = "avx2")]
3921	unsafe fn test_mm256_add_epi8() {
3922	#[rustfmt::skip]
3923	let a = _mm256_setr_epi8(
3924	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3925	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3926	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
3927	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3928	);
3929	#[rustfmt::skip]
3930	let b = _mm256_setr_epi8(
3931	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3932	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3933	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
3934	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3935	);
3936	let r = _mm256_add_epi8(a, b);
3937	#[rustfmt::skip]
3938	let e = _mm256_setr_epi8(
3939	`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`,
3940	`16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`,
3941	`32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
3942	`48`, `50`, `52`, `54`, `56`, `58`, `60`, `62`,
3943	);
3944	assert_eq_m256i(r, e);
3945	}
3946
3947	#[simd_test(enable = "avx2")]
3948	unsafe fn test_mm256_adds_epi8() {
3949	#[rustfmt::skip]
3950	let a = _mm256_setr_epi8(
3951	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3952	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3953	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
3954	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3955	);
3956	#[rustfmt::skip]
3957	let b = _mm256_setr_epi8(
3958	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`,
3959	`40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
3960	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`,
3961	`56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`,
3962	);
3963	let r = _mm256_adds_epi8(a, b);
3964	#[rustfmt::skip]
3965	let e = _mm256_setr_epi8(
3966	`32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
3967	`48`, `50`, `52`, `54`, `56`, `58`, `60`, `62`,
3968	`64`, `66`, `68`, `70`, `72`, `74`, `76`, `78`,
3969	`80`, `82`, `84`, `86`, `88`, `90`, `92`, `94`,
3970	);
3971	assert_eq_m256i(r, e);
3972	}
3973
3974	#[simd_test(enable = "avx2")]
3975	unsafe fn test_mm256_adds_epi8_saturate_positive() {
3976	let a = _mm256_set1_epi8(`0x7F`);
3977	let b = _mm256_set1_epi8(`1`);
3978	let r = _mm256_adds_epi8(a, b);
3979	assert_eq_m256i(r, a);
3980	}
3981
3982	#[simd_test(enable = "avx2")]
3983	unsafe fn test_mm256_adds_epi8_saturate_negative() {
3984	let a = _mm256_set1_epi8(`-0x80`);
3985	let b = _mm256_set1_epi8(`-1`);
3986	let r = _mm256_adds_epi8(a, b);
3987	assert_eq_m256i(r, a);
3988	}
3989
3990	#[simd_test(enable = "avx2")]
3991	unsafe fn test_mm256_adds_epi16() {
3992	#[rustfmt::skip]
3993	let a = _mm256_setr_epi16(
3994	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3995	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3996	);
3997	#[rustfmt::skip]
3998	let b = _mm256_setr_epi16(
3999	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`,
4000	`40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
4001	);
4002	let r = _mm256_adds_epi16(a, b);
4003	#[rustfmt::skip]
4004	let e = _mm256_setr_epi16(
4005	`32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
4006	`48`, `50`, `52`, `54`, `56`, `58`, `60`, `62`,
4007	);
4008
4009	assert_eq_m256i(r, e);
4010	}
4011
4012	#[simd_test(enable = "avx2")]
4013	unsafe fn test_mm256_adds_epi16_saturate_positive() {
4014	let a = _mm256_set1_epi16(`0x7FFF`);
4015	let b = _mm256_set1_epi16(`1`);
4016	let r = _mm256_adds_epi16(a, b);
4017	assert_eq_m256i(r, a);
4018	}
4019
4020	#[simd_test(enable = "avx2")]
4021	unsafe fn test_mm256_adds_epi16_saturate_negative() {
4022	let a = _mm256_set1_epi16(`-0x8000`);
4023	let b = _mm256_set1_epi16(`-1`);
4024	let r = _mm256_adds_epi16(a, b);
4025	assert_eq_m256i(r, a);
4026	}
4027
4028	#[simd_test(enable = "avx2")]
4029	unsafe fn test_mm256_adds_epu8() {
4030	#[rustfmt::skip]
4031	let a = _mm256_setr_epi8(
4032	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4033	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4034	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
4035	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
4036	);
4037	#[rustfmt::skip]
4038	let b = _mm256_setr_epi8(
4039	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`,
4040	`40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
4041	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`,
4042	`56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`,
4043	);
4044	let r = _mm256_adds_epu8(a, b);
4045	#[rustfmt::skip]
4046	let e = _mm256_setr_epi8(
4047	`32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
4048	`48`, `50`, `52`, `54`, `56`, `58`, `60`, `62`,
4049	`64`, `66`, `68`, `70`, `72`, `74`, `76`, `78`,
4050	`80`, `82`, `84`, `86`, `88`, `90`, `92`, `94`,
4051	);
4052	assert_eq_m256i(r, e);
4053	}
4054
4055	#[simd_test(enable = "avx2")]
4056	unsafe fn test_mm256_adds_epu8_saturate() {
4057	let a = _mm256_set1_epi8(!`0`);
4058	let b = _mm256_set1_epi8(`1`);
4059	let r = _mm256_adds_epu8(a, b);
4060	assert_eq_m256i(r, a);
4061	}
4062
4063	#[simd_test(enable = "avx2")]
4064	unsafe fn test_mm256_adds_epu16() {
4065	#[rustfmt::skip]
4066	let a = _mm256_setr_epi16(
4067	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4068	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4069	);
4070	#[rustfmt::skip]
4071	let b = _mm256_setr_epi16(
4072	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`,
4073	`40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
4074	);
4075	let r = _mm256_adds_epu16(a, b);
4076	#[rustfmt::skip]
4077	let e = _mm256_setr_epi16(
4078	`32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
4079	`48`, `50`, `52`, `54`, `56`, `58`, `60`, `62`,
4080	);
4081
4082	assert_eq_m256i(r, e);
4083	}
4084
4085	#[simd_test(enable = "avx2")]
4086	unsafe fn test_mm256_adds_epu16_saturate() {
4087	let a = _mm256_set1_epi16(!`0`);
4088	let b = _mm256_set1_epi16(`1`);
4089	let r = _mm256_adds_epu16(a, b);
4090	assert_eq_m256i(r, a);
4091	}
4092
4093	#[simd_test(enable = "avx2")]
4094	unsafe fn test_mm256_and_si256() {
4095	let a = _mm256_set1_epi8(`5`);
4096	let b = _mm256_set1_epi8(`3`);
4097	let got = _mm256_and_si256(a, b);
4098	assert_eq_m256i(got, _mm256_set1_epi8(`1`));
4099	}
4100
4101	#[simd_test(enable = "avx2")]
4102	unsafe fn test_mm256_andnot_si256() {
4103	let a = _mm256_set1_epi8(`5`);
4104	let b = _mm256_set1_epi8(`3`);
4105	let got = _mm256_andnot_si256(a, b);
4106	assert_eq_m256i(got, _mm256_set1_epi8(`2`));
4107	}
4108
4109	#[simd_test(enable = "avx2")]
4110	unsafe fn test_mm256_avg_epu8() {
4111	let (a, b) = (_mm256_set1_epi8(`3`), _mm256_set1_epi8(`9`));
4112	let r = _mm256_avg_epu8(a, b);
4113	assert_eq_m256i(r, _mm256_set1_epi8(`6`));
4114	}
4115
4116	#[simd_test(enable = "avx2")]
4117	unsafe fn test_mm256_avg_epu16() {
4118	let (a, b) = (_mm256_set1_epi16(`3`), _mm256_set1_epi16(`9`));
4119	let r = _mm256_avg_epu16(a, b);
4120	assert_eq_m256i(r, _mm256_set1_epi16(`6`));
4121	}
4122
4123	#[simd_test(enable = "avx2")]
4124	unsafe fn test_mm_blend_epi32() {
4125	let (a, b) = (_mm_set1_epi32(`3`), _mm_set1_epi32(`9`));
4126	let e = _mm_setr_epi32(`9`, `3`, `3`, `3`);
4127	let r = _mm_blend_epi32::<`0x01`>(a, b);
4128	assert_eq_m128i(r, e);
4129
4130	let r = _mm_blend_epi32::<`0x0E`>(b, a);
4131	assert_eq_m128i(r, e);
4132	}
4133
4134	#[simd_test(enable = "avx2")]
4135	unsafe fn test_mm256_blend_epi32() {
4136	let (a, b) = (_mm256_set1_epi32(`3`), _mm256_set1_epi32(`9`));
4137	let e = _mm256_setr_epi32(`9`, `3`, `3`, `3`, `3`, `3`, `3`, `3`);
4138	let r = _mm256_blend_epi32::<`0x01`>(a, b);
4139	assert_eq_m256i(r, e);
4140
4141	let e = _mm256_setr_epi32(`3`, `9`, `3`, `3`, `3`, `3`, `3`, `9`);
4142	let r = _mm256_blend_epi32::<`0x82`>(a, b);
4143	assert_eq_m256i(r, e);
4144
4145	let e = _mm256_setr_epi32(`3`, `3`, `9`, `9`, `9`, `9`, `9`, `3`);
4146	let r = _mm256_blend_epi32::<`0x7C`>(a, b);
4147	assert_eq_m256i(r, e);
4148	}
4149
4150	#[simd_test(enable = "avx2")]
4151	unsafe fn test_mm256_blend_epi16() {
4152	let (a, b) = (_mm256_set1_epi16(`3`), _mm256_set1_epi16(`9`));
4153	let e = _mm256_setr_epi16(`9`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `9`, `3`, `3`, `3`, `3`, `3`, `3`, `3`);
4154	let r = _mm256_blend_epi16::<`0x01`>(a, b);
4155	assert_eq_m256i(r, e);
4156
4157	let r = _mm256_blend_epi16::<`0xFE`>(b, a);
4158	assert_eq_m256i(r, e);
4159	}
4160
4161	#[simd_test(enable = "avx2")]
4162	unsafe fn test_mm256_blendv_epi8() {
4163	let (a, b) = (_mm256_set1_epi8(`4`), _mm256_set1_epi8(`2`));
4164	let mask = _mm256_insert_epi8::<`2`>(_mm256_set1_epi8(`0`), `-1`);
4165	let e = _mm256_insert_epi8::<`2`>(_mm256_set1_epi8(`4`), `2`);
4166	let r = _mm256_blendv_epi8(a, b, mask);
4167	assert_eq_m256i(r, e);
4168	}
4169
4170	#[simd_test(enable = "avx2")]
4171	unsafe fn test_mm_broadcastb_epi8() {
4172	let a = _mm_insert_epi8::<`0`>(_mm_set1_epi8(`0x00`), `0x2a`);
4173	let res = _mm_broadcastb_epi8(a);
4174	assert_eq_m128i(res, _mm_set1_epi8(`0x2a`));
4175	}
4176
4177	#[simd_test(enable = "avx2")]
4178	unsafe fn test_mm256_broadcastb_epi8() {
4179	let a = _mm_insert_epi8::<`0`>(_mm_set1_epi8(`0x00`), `0x2a`);
4180	let res = _mm256_broadcastb_epi8(a);
4181	assert_eq_m256i(res, _mm256_set1_epi8(`0x2a`));
4182	}
4183
4184	#[simd_test(enable = "avx2")]
4185	unsafe fn test_mm_broadcastd_epi32() {
4186	let a = _mm_setr_epi32(`0x2a`, `0x8000000`, `0`, `0`);
4187	let res = _mm_broadcastd_epi32(a);
4188	assert_eq_m128i(res, _mm_set1_epi32(`0x2a`));
4189	}
4190
4191	#[simd_test(enable = "avx2")]
4192	unsafe fn test_mm256_broadcastd_epi32() {
4193	let a = _mm_setr_epi32(`0x2a`, `0x8000000`, `0`, `0`);
4194	let res = _mm256_broadcastd_epi32(a);
4195	assert_eq_m256i(res, _mm256_set1_epi32(`0x2a`));
4196	}
4197
4198	#[simd_test(enable = "avx2")]
4199	unsafe fn test_mm_broadcastq_epi64() {
4200	let a = _mm_setr_epi64x(`0x1ffffffff`, `0`);
4201	let res = _mm_broadcastq_epi64(a);
4202	assert_eq_m128i(res, _mm_set1_epi64x(`0x1ffffffff`));
4203	}
4204
4205	#[simd_test(enable = "avx2")]
4206	unsafe fn test_mm256_broadcastq_epi64() {
4207	let a = _mm_setr_epi64x(`0x1ffffffff`, `0`);
4208	let res = _mm256_broadcastq_epi64(a);
4209	assert_eq_m256i(res, _mm256_set1_epi64x(`0x1ffffffff`));
4210	}
4211
4212	#[simd_test(enable = "avx2")]
4213	unsafe fn test_mm_broadcastsd_pd() {
4214	let a = _mm_setr_pd(`6.28`, `3.14`);
4215	let res = _mm_broadcastsd_pd(a);
4216	assert_eq_m128d(res, _mm_set1_pd(`6.28f64`));
4217	}
4218
4219	#[simd_test(enable = "avx2")]
4220	unsafe fn test_mm256_broadcastsd_pd() {
4221	let a = _mm_setr_pd(`6.28`, `3.14`);
4222	let res = _mm256_broadcastsd_pd(a);
4223	assert_eq_m256d(res, _mm256_set1_pd(`6.28f64`));
4224	}
4225
4226	#[simd_test(enable = "avx2")]
4227	unsafe fn test_mm256_broadcastsi128_si256() {
4228	let a = _mm_setr_epi64x(`0x0987654321012334`, `0x5678909876543210`);
4229	let res = _mm256_broadcastsi128_si256(a);
4230	let retval = _mm256_setr_epi64x(
4231	`0x0987654321012334`,
4232	`0x5678909876543210`,
4233	`0x0987654321012334`,
4234	`0x5678909876543210`,
4235	);
4236	assert_eq_m256i(res, retval);
4237	}
4238
4239	#[simd_test(enable = "avx2")]
4240	unsafe fn test_mm_broadcastss_ps() {
4241	let a = _mm_setr_ps(`6.28`, `3.14`, `0.0`, `0.0`);
4242	let res = _mm_broadcastss_ps(a);
4243	assert_eq_m128(res, _mm_set1_ps(`6.28f32`));
4244	}
4245
4246	#[simd_test(enable = "avx2")]
4247	unsafe fn test_mm256_broadcastss_ps() {
4248	let a = _mm_setr_ps(`6.28`, `3.14`, `0.0`, `0.0`);
4249	let res = _mm256_broadcastss_ps(a);
4250	assert_eq_m256(res, _mm256_set1_ps(`6.28f32`));
4251	}
4252
4253	#[simd_test(enable = "avx2")]
4254	unsafe fn test_mm_broadcastw_epi16() {
4255	let a = _mm_insert_epi16::<`0`>(_mm_set1_epi16(`0x2a`), `0x22b`);
4256	let res = _mm_broadcastw_epi16(a);
4257	assert_eq_m128i(res, _mm_set1_epi16(`0x22b`));
4258	}
4259
4260	#[simd_test(enable = "avx2")]
4261	unsafe fn test_mm256_broadcastw_epi16() {
4262	let a = _mm_insert_epi16::<`0`>(_mm_set1_epi16(`0x2a`), `0x22b`);
4263	let res = _mm256_broadcastw_epi16(a);
4264	assert_eq_m256i(res, _mm256_set1_epi16(`0x22b`));
4265	}
4266
4267	#[simd_test(enable = "avx2")]
4268	unsafe fn test_mm256_cmpeq_epi8() {
4269	#[rustfmt::skip]
4270	let a = _mm256_setr_epi8(
4271	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4272	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4273	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
4274	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
4275	);
4276	#[rustfmt::skip]
4277	let b = _mm256_setr_epi8(
4278	`31`, `30`, `2`, `28`, `27`, `26`, `25`, `24`,
4279	`23`, `22`, `21`, `20`, `19`, `18`, `17`, `16`,
4280	`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`,
4281	`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`,
4282	);
4283	let r = _mm256_cmpeq_epi8(a, b);
4284	assert_eq_m256i(r, _mm256_insert_epi8::<`2`>(_mm256_set1_epi8(`0`), !`0`));
4285	}
4286
4287	#[simd_test(enable = "avx2")]
4288	unsafe fn test_mm256_cmpeq_epi16() {
4289	#[rustfmt::skip]
4290	let a = _mm256_setr_epi16(
4291	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4292	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4293	);
4294	#[rustfmt::skip]
4295	let b = _mm256_setr_epi16(
4296	`15`, `14`, `2`, `12`, `11`, `10`, `9`, `8`,
4297	`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`,
4298	);
4299	let r = _mm256_cmpeq_epi16(a, b);
4300	assert_eq_m256i(r, _mm256_insert_epi16::<`2`>(_mm256_set1_epi16(`0`), !`0`));
4301	}
4302
4303	#[simd_test(enable = "avx2")]
4304	unsafe fn test_mm256_cmpeq_epi32() {
4305	let a = _mm256_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4306	let b = _mm256_setr_epi32(`7`, `6`, `2`, `4`, `3`, `2`, `1`, `0`);
4307	let r = _mm256_cmpeq_epi32(a, b);
4308	let e = _mm256_set1_epi32(`0`);
4309	let e = _mm256_insert_epi32::<`2`>(e, !`0`);
4310	assert_eq_m256i(r, e);
4311	}
4312
4313	#[simd_test(enable = "avx2")]
4314	unsafe fn test_mm256_cmpeq_epi64() {
4315	let a = _mm256_setr_epi64x(`0`, `1`, `2`, `3`);
4316	let b = _mm256_setr_epi64x(`3`, `2`, `2`, `0`);
4317	let r = _mm256_cmpeq_epi64(a, b);
4318	assert_eq_m256i(r, _mm256_insert_epi64::<`2`>(_mm256_set1_epi64x(`0`), !`0`));
4319	}
4320
4321	#[simd_test(enable = "avx2")]
4322	unsafe fn test_mm256_cmpgt_epi8() {
4323	let a = _mm256_insert_epi8::<`0`>(_mm256_set1_epi8(`0`), `5`);
4324	let b = _mm256_set1_epi8(`0`);
4325	let r = _mm256_cmpgt_epi8(a, b);
4326	assert_eq_m256i(r, _mm256_insert_epi8::<`0`>(_mm256_set1_epi8(`0`), !`0`));
4327	}
4328
4329	#[simd_test(enable = "avx2")]
4330	unsafe fn test_mm256_cmpgt_epi16() {
4331	let a = _mm256_insert_epi16::<`0`>(_mm256_set1_epi16(`0`), `5`);
4332	let b = _mm256_set1_epi16(`0`);
4333	let r = _mm256_cmpgt_epi16(a, b);
4334	assert_eq_m256i(r, _mm256_insert_epi16::<`0`>(_mm256_set1_epi16(`0`), !`0`));
4335	}
4336
4337	#[simd_test(enable = "avx2")]
4338	unsafe fn test_mm256_cmpgt_epi32() {
4339	let a = _mm256_insert_epi32::<`0`>(_mm256_set1_epi32(`0`), `5`);
4340	let b = _mm256_set1_epi32(`0`);
4341	let r = _mm256_cmpgt_epi32(a, b);
4342	assert_eq_m256i(r, _mm256_insert_epi32::<`0`>(_mm256_set1_epi32(`0`), !`0`));
4343	}
4344
4345	#[simd_test(enable = "avx2")]
4346	unsafe fn test_mm256_cmpgt_epi64() {
4347	let a = _mm256_insert_epi64::<`0`>(_mm256_set1_epi64x(`0`), `5`);
4348	let b = _mm256_set1_epi64x(`0`);
4349	let r = _mm256_cmpgt_epi64(a, b);
4350	assert_eq_m256i(r, _mm256_insert_epi64::<`0`>(_mm256_set1_epi64x(`0`), !`0`));
4351	}
4352
4353	#[simd_test(enable = "avx2")]
4354	unsafe fn test_mm256_cvtepi8_epi16() {
4355	#[rustfmt::skip]
4356	let a = _mm_setr_epi8(
4357	`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`,
4358	`-4`, `4`, `-5`, `5`, `-6`, `6`, `-7`, `7`,
4359	);
4360	#[rustfmt::skip]
4361	let r = _mm256_setr_epi16(
4362	`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`,
4363	`-4`, `4`, `-5`, `5`, `-6`, `6`, `-7`, `7`,
4364	);
4365	assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4366	}
4367
4368	#[simd_test(enable = "avx2")]
4369	unsafe fn test_mm256_cvtepi8_epi32() {
4370	#[rustfmt::skip]
4371	let a = _mm_setr_epi8(
4372	`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`,
4373	`-4`, `4`, `-5`, `5`, `-6`, `6`, `-7`, `7`,
4374	);
4375	let r = _mm256_setr_epi32(`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`);
4376	assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4377	}
4378
4379	#[simd_test(enable = "avx2")]
4380	unsafe fn test_mm256_cvtepi8_epi64() {
4381	#[rustfmt::skip]
4382	let a = _mm_setr_epi8(
4383	`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`,
4384	`-4`, `4`, `-5`, `5`, `-6`, `6`, `-7`, `7`,
4385	);
4386	let r = _mm256_setr_epi64x(`0`, `0`, `-1`, `1`);
4387	assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4388	}
4389
4390	#[simd_test(enable = "avx2")]
4391	unsafe fn test_mm256_cvtepi16_epi32() {
4392	let a = _mm_setr_epi16(`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`);
4393	let r = _mm256_setr_epi32(`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`);
4394	assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4395	}
4396
4397	#[simd_test(enable = "avx2")]
4398	unsafe fn test_mm256_cvtepi16_epi64() {
4399	let a = _mm_setr_epi16(`0`, `0`, `-1`, `1`, `-2`, `2`, `-3`, `3`);
4400	let r = _mm256_setr_epi64x(`0`, `0`, `-1`, `1`);
4401	assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4402	}
4403
4404	#[simd_test(enable = "avx2")]
4405	unsafe fn test_mm256_cvtepi32_epi64() {
4406	let a = _mm_setr_epi32(`0`, `0`, `-1`, `1`);
4407	let r = _mm256_setr_epi64x(`0`, `0`, `-1`, `1`);
4408	assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4409	}
4410
4411	#[simd_test(enable = "avx2")]
4412	unsafe fn test_mm256_cvtepu16_epi32() {
4413	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4414	let r = _mm256_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4415	assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4416	}
4417
4418	#[simd_test(enable = "avx2")]
4419	unsafe fn test_mm256_cvtepu16_epi64() {
4420	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4421	let r = _mm256_setr_epi64x(`0`, `1`, `2`, `3`);
4422	assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4423	}
4424
4425	#[simd_test(enable = "avx2")]
4426	unsafe fn test_mm256_cvtepu32_epi64() {
4427	let a = _mm_setr_epi32(`0`, `1`, `2`, `3`);
4428	let r = _mm256_setr_epi64x(`0`, `1`, `2`, `3`);
4429	assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4430	}
4431
4432	#[simd_test(enable = "avx2")]
4433	unsafe fn test_mm256_cvtepu8_epi16() {
4434	#[rustfmt::skip]
4435	let a = _mm_setr_epi8(
4436	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4437	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4438	);
4439	#[rustfmt::skip]
4440	let r = _mm256_setr_epi16(
4441	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4442	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4443	);
4444	assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4445	}
4446
4447	#[simd_test(enable = "avx2")]
4448	unsafe fn test_mm256_cvtepu8_epi32() {
4449	#[rustfmt::skip]
4450	let a = _mm_setr_epi8(
4451	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4452	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4453	);
4454	let r = _mm256_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4455	assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4456	}
4457
4458	#[simd_test(enable = "avx2")]
4459	unsafe fn test_mm256_cvtepu8_epi64() {
4460	#[rustfmt::skip]
4461	let a = _mm_setr_epi8(
4462	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4463	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4464	);
4465	let r = _mm256_setr_epi64x(`0`, `1`, `2`, `3`);
4466	assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4467	}
4468
4469	#[simd_test(enable = "avx2")]
4470	unsafe fn test_mm256_extracti128_si256() {
4471	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4472	let r = _mm256_extracti128_si256::<`1`>(a);
4473	let e = _mm_setr_epi64x(`3`, `4`);
4474	assert_eq_m128i(r, e);
4475	}
4476
4477	#[simd_test(enable = "avx2")]
4478	unsafe fn test_mm256_hadd_epi16() {
4479	let a = _mm256_set1_epi16(`2`);
4480	let b = _mm256_set1_epi16(`4`);
4481	let r = _mm256_hadd_epi16(a, b);
4482	let e = _mm256_setr_epi16(`4`, `4`, `4`, `4`, `8`, `8`, `8`, `8`, `4`, `4`, `4`, `4`, `8`, `8`, `8`, `8`);
4483	assert_eq_m256i(r, e);
4484	}
4485
4486	#[simd_test(enable = "avx2")]
4487	unsafe fn test_mm256_hadd_epi32() {
4488	let a = _mm256_set1_epi32(`2`);
4489	let b = _mm256_set1_epi32(`4`);
4490	let r = _mm256_hadd_epi32(a, b);
4491	let e = _mm256_setr_epi32(`4`, `4`, `8`, `8`, `4`, `4`, `8`, `8`);
4492	assert_eq_m256i(r, e);
4493	}
4494
4495	#[simd_test(enable = "avx2")]
4496	unsafe fn test_mm256_hadds_epi16() {
4497	let a = _mm256_set1_epi16(`2`);
4498	let a = _mm256_insert_epi16::<`0`>(a, `0x7fff`);
4499	let a = _mm256_insert_epi16::<`1`>(a, `1`);
4500	let b = _mm256_set1_epi16(`4`);
4501	let r = _mm256_hadds_epi16(a, b);
4502	#[rustfmt::skip]
4503	let e = _mm256_setr_epi16(
4504	`0x7FFF`, `4`, `4`, `4`, `8`, `8`, `8`, `8`,
4505	`4`, `4`, `4`, `4`, `8`, `8`, `8`, `8`,
4506	);
4507	assert_eq_m256i(r, e);
4508	}
4509
4510	#[simd_test(enable = "avx2")]
4511	unsafe fn test_mm256_hsub_epi16() {
4512	let a = _mm256_set1_epi16(`2`);
4513	let b = _mm256_set1_epi16(`4`);
4514	let r = _mm256_hsub_epi16(a, b);
4515	let e = _mm256_set1_epi16(`0`);
4516	assert_eq_m256i(r, e);
4517	}
4518
4519	#[simd_test(enable = "avx2")]
4520	unsafe fn test_mm256_hsub_epi32() {
4521	let a = _mm256_set1_epi32(`2`);
4522	let b = _mm256_set1_epi32(`4`);
4523	let r = _mm256_hsub_epi32(a, b);
4524	let e = _mm256_set1_epi32(`0`);
4525	assert_eq_m256i(r, e);
4526	}
4527
4528	#[simd_test(enable = "avx2")]
4529	unsafe fn test_mm256_hsubs_epi16() {
4530	let a = _mm256_set1_epi16(`2`);
4531	let a = _mm256_insert_epi16::<`0`>(a, `0x7fff`);
4532	let a = _mm256_insert_epi16::<`1`>(a, `-1`);
4533	let b = _mm256_set1_epi16(`4`);
4534	let r = _mm256_hsubs_epi16(a, b);
4535	let e = _mm256_insert_epi16::<`0`>(_mm256_set1_epi16(`0`), `0x7FFF`);
4536	assert_eq_m256i(r, e);
4537	}
4538
4539	#[simd_test(enable = "avx2")]
4540	unsafe fn test_mm256_madd_epi16() {
4541	let a = _mm256_set1_epi16(`2`);
4542	let b = _mm256_set1_epi16(`4`);
4543	let r = _mm256_madd_epi16(a, b);
4544	let e = _mm256_set1_epi32(`16`);
4545	assert_eq_m256i(r, e);
4546	}
4547
4548	#[simd_test(enable = "avx2")]
4549	unsafe fn test_mm256_inserti128_si256() {
4550	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
4551	let b = _mm_setr_epi64x(`7`, `8`);
4552	let r = _mm256_inserti128_si256::<`1`>(a, b);
4553	let e = _mm256_setr_epi64x(`1`, `2`, `7`, `8`);
4554	assert_eq_m256i(r, e);
4555	}
4556
4557	#[simd_test(enable = "avx2")]
4558	unsafe fn test_mm256_maddubs_epi16() {
4559	let a = _mm256_set1_epi8(`2`);
4560	let b = _mm256_set1_epi8(`4`);
4561	let r = _mm256_maddubs_epi16(a, b);
4562	let e = _mm256_set1_epi16(`16`);
4563	assert_eq_m256i(r, e);
4564	}
4565
4566	#[simd_test(enable = "avx2")]
4567	unsafe fn test_mm_maskload_epi32() {
4568	let nums = [`1`, `2`, `3`, `4`];
4569	let a = &nums as *const i32;
4570	let mask = _mm_setr_epi32(`-1`, `0`, `0`, `-1`);
4571	let r = _mm_maskload_epi32(a, mask);
4572	let e = _mm_setr_epi32(`1`, `0`, `0`, `4`);
4573	assert_eq_m128i(r, e);
4574	}
4575
4576	#[simd_test(enable = "avx2")]
4577	unsafe fn test_mm256_maskload_epi32() {
4578	let nums = [`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
4579	let a = &nums as *const i32;
4580	let mask = _mm256_setr_epi32(`-1`, `0`, `0`, `-1`, `0`, `-1`, `-1`, `0`);
4581	let r = _mm256_maskload_epi32(a, mask);
4582	let e = _mm256_setr_epi32(`1`, `0`, `0`, `4`, `0`, `6`, `7`, `0`);
4583	assert_eq_m256i(r, e);
4584	}
4585
4586	#[simd_test(enable = "avx2")]
4587	unsafe fn test_mm_maskload_epi64() {
4588	let nums = [`1_i64`, `2_i64`];
4589	let a = &nums as *const i64;
4590	let mask = _mm_setr_epi64x(`0`, `-1`);
4591	let r = _mm_maskload_epi64(a, mask);
4592	let e = _mm_setr_epi64x(`0`, `2`);
4593	assert_eq_m128i(r, e);
4594	}
4595
4596	#[simd_test(enable = "avx2")]
4597	unsafe fn test_mm256_maskload_epi64() {
4598	let nums = [`1_i64`, `2_i64`, `3_i64`, `4_i64`];
4599	let a = &nums as *const i64;
4600	let mask = _mm256_setr_epi64x(`0`, `-1`, `-1`, `0`);
4601	let r = _mm256_maskload_epi64(a, mask);
4602	let e = _mm256_setr_epi64x(`0`, `2`, `3`, `0`);
4603	assert_eq_m256i(r, e);
4604	}
4605
4606	#[simd_test(enable = "avx2")]
4607	unsafe fn test_mm_maskstore_epi32() {
4608	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
4609	let mut arr = [`-1`, `-1`, `-1`, `-1`];
4610	let mask = _mm_setr_epi32(`-1`, `0`, `0`, `-1`);
4611	_mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4612	let e = [`1`, `-1`, `-1`, `4`];
4613	assert_eq!(arr, e);
4614	}
4615
4616	#[simd_test(enable = "avx2")]
4617	unsafe fn test_mm256_maskstore_epi32() {
4618	let a = _mm256_setr_epi32(`1`, `0x6d726f`, `3`, `42`, `0x777161`, `6`, `7`, `8`);
4619	let mut arr = [`-1`, `-1`, `-1`, `0x776173`, `-1`, `0x68657265`, `-1`, `-1`];
4620	let mask = _mm256_setr_epi32(`-1`, `0`, `0`, `-1`, `0`, `-1`, `-1`, `0`);
4621	_mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4622	let e = [`1`, `-1`, `-1`, `42`, `-1`, `6`, `7`, `-1`];
4623	assert_eq!(arr, e);
4624	}
4625
4626	#[simd_test(enable = "avx2")]
4627	unsafe fn test_mm_maskstore_epi64() {
4628	let a = _mm_setr_epi64x(`1_i64`, `2_i64`);
4629	let mut arr = [`-1_i64`, `-1_i64`];
4630	let mask = _mm_setr_epi64x(`0`, `-1`);
4631	_mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4632	let e = [`-1`, `2`];
4633	assert_eq!(arr, e);
4634	}
4635
4636	#[simd_test(enable = "avx2")]
4637	unsafe fn test_mm256_maskstore_epi64() {
4638	let a = _mm256_setr_epi64x(`1_i64`, `2_i64`, `3_i64`, `4_i64`);
4639	let mut arr = [`-1_i64`, `-1_i64`, `-1_i64`, `-1_i64`];
4640	let mask = _mm256_setr_epi64x(`0`, `-1`, `-1`, `0`);
4641	_mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4642	let e = [`-1`, `2`, `3`, `-1`];
4643	assert_eq!(arr, e);
4644	}
4645
4646	#[simd_test(enable = "avx2")]
4647	unsafe fn test_mm256_max_epi16() {
4648	let a = _mm256_set1_epi16(`2`);
4649	let b = _mm256_set1_epi16(`4`);
4650	let r = _mm256_max_epi16(a, b);
4651	assert_eq_m256i(r, b);
4652	}
4653
4654	#[simd_test(enable = "avx2")]
4655	unsafe fn test_mm256_max_epi32() {
4656	let a = _mm256_set1_epi32(`2`);
4657	let b = _mm256_set1_epi32(`4`);
4658	let r = _mm256_max_epi32(a, b);
4659	assert_eq_m256i(r, b);
4660	}
4661
4662	#[simd_test(enable = "avx2")]
4663	unsafe fn test_mm256_max_epi8() {
4664	let a = _mm256_set1_epi8(`2`);
4665	let b = _mm256_set1_epi8(`4`);
4666	let r = _mm256_max_epi8(a, b);
4667	assert_eq_m256i(r, b);
4668	}
4669
4670	#[simd_test(enable = "avx2")]
4671	unsafe fn test_mm256_max_epu16() {
4672	let a = _mm256_set1_epi16(`2`);
4673	let b = _mm256_set1_epi16(`4`);
4674	let r = _mm256_max_epu16(a, b);
4675	assert_eq_m256i(r, b);
4676	}
4677
4678	#[simd_test(enable = "avx2")]
4679	unsafe fn test_mm256_max_epu32() {
4680	let a = _mm256_set1_epi32(`2`);
4681	let b = _mm256_set1_epi32(`4`);
4682	let r = _mm256_max_epu32(a, b);
4683	assert_eq_m256i(r, b);
4684	}
4685
4686	#[simd_test(enable = "avx2")]
4687	unsafe fn test_mm256_max_epu8() {
4688	let a = _mm256_set1_epi8(`2`);
4689	let b = _mm256_set1_epi8(`4`);
4690	let r = _mm256_max_epu8(a, b);
4691	assert_eq_m256i(r, b);
4692	}
4693
4694	#[simd_test(enable = "avx2")]
4695	unsafe fn test_mm256_min_epi16() {
4696	let a = _mm256_set1_epi16(`2`);
4697	let b = _mm256_set1_epi16(`4`);
4698	let r = _mm256_min_epi16(a, b);
4699	assert_eq_m256i(r, a);
4700	}
4701
4702	#[simd_test(enable = "avx2")]
4703	unsafe fn test_mm256_min_epi32() {
4704	let a = _mm256_set1_epi32(`2`);
4705	let b = _mm256_set1_epi32(`4`);
4706	let r = _mm256_min_epi32(a, b);
4707	assert_eq_m256i(r, a);
4708	}
4709
4710	#[simd_test(enable = "avx2")]
4711	unsafe fn test_mm256_min_epi8() {
4712	let a = _mm256_set1_epi8(`2`);
4713	let b = _mm256_set1_epi8(`4`);
4714	let r = _mm256_min_epi8(a, b);
4715	assert_eq_m256i(r, a);
4716	}
4717
4718	#[simd_test(enable = "avx2")]
4719	unsafe fn test_mm256_min_epu16() {
4720	let a = _mm256_set1_epi16(`2`);
4721	let b = _mm256_set1_epi16(`4`);
4722	let r = _mm256_min_epu16(a, b);
4723	assert_eq_m256i(r, a);
4724	}
4725
4726	#[simd_test(enable = "avx2")]
4727	unsafe fn test_mm256_min_epu32() {
4728	let a = _mm256_set1_epi32(`2`);
4729	let b = _mm256_set1_epi32(`4`);
4730	let r = _mm256_min_epu32(a, b);
4731	assert_eq_m256i(r, a);
4732	}
4733
4734	#[simd_test(enable = "avx2")]
4735	unsafe fn test_mm256_min_epu8() {
4736	let a = _mm256_set1_epi8(`2`);
4737	let b = _mm256_set1_epi8(`4`);
4738	let r = _mm256_min_epu8(a, b);
4739	assert_eq_m256i(r, a);
4740	}
4741
4742	#[simd_test(enable = "avx2")]
4743	unsafe fn test_mm256_movemask_epi8() {
4744	let a = _mm256_set1_epi8(`-1`);
4745	let r = _mm256_movemask_epi8(a);
4746	let e = `-1`;
4747	assert_eq!(r, e);
4748	}
4749
4750	#[simd_test(enable = "avx2")]
4751	unsafe fn test_mm256_mpsadbw_epu8() {
4752	let a = _mm256_set1_epi8(`2`);
4753	let b = _mm256_set1_epi8(`4`);
4754	let r = _mm256_mpsadbw_epu8::<`0`>(a, b);
4755	let e = _mm256_set1_epi16(`8`);
4756	assert_eq_m256i(r, e);
4757	}
4758
4759	#[simd_test(enable = "avx2")]
4760	unsafe fn test_mm256_mul_epi32() {
4761	let a = _mm256_setr_epi32(`0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`);
4762	let b = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
4763	let r = _mm256_mul_epi32(a, b);
4764	let e = _mm256_setr_epi64x(`0`, `0`, `10`, `14`);
4765	assert_eq_m256i(r, e);
4766	}
4767
4768	#[simd_test(enable = "avx2")]
4769	unsafe fn test_mm256_mul_epu32() {
4770	let a = _mm256_setr_epi32(`0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`);
4771	let b = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
4772	let r = _mm256_mul_epu32(a, b);
4773	let e = _mm256_setr_epi64x(`0`, `0`, `10`, `14`);
4774	assert_eq_m256i(r, e);
4775	}
4776
4777	#[simd_test(enable = "avx2")]
4778	unsafe fn test_mm256_mulhi_epi16() {
4779	let a = _mm256_set1_epi16(`6535`);
4780	let b = _mm256_set1_epi16(`6535`);
4781	let r = _mm256_mulhi_epi16(a, b);
4782	let e = _mm256_set1_epi16(`651`);
4783	assert_eq_m256i(r, e);
4784	}
4785
4786	#[simd_test(enable = "avx2")]
4787	unsafe fn test_mm256_mulhi_epu16() {
4788	let a = _mm256_set1_epi16(`6535`);
4789	let b = _mm256_set1_epi16(`6535`);
4790	let r = _mm256_mulhi_epu16(a, b);
4791	let e = _mm256_set1_epi16(`651`);
4792	assert_eq_m256i(r, e);
4793	}
4794
4795	#[simd_test(enable = "avx2")]
4796	unsafe fn test_mm256_mullo_epi16() {
4797	let a = _mm256_set1_epi16(`2`);
4798	let b = _mm256_set1_epi16(`4`);
4799	let r = _mm256_mullo_epi16(a, b);
4800	let e = _mm256_set1_epi16(`8`);
4801	assert_eq_m256i(r, e);
4802	}
4803
4804	#[simd_test(enable = "avx2")]
4805	unsafe fn test_mm256_mullo_epi32() {
4806	let a = _mm256_set1_epi32(`2`);
4807	let b = _mm256_set1_epi32(`4`);
4808	let r = _mm256_mullo_epi32(a, b);
4809	let e = _mm256_set1_epi32(`8`);
4810	assert_eq_m256i(r, e);
4811	}
4812
4813	#[simd_test(enable = "avx2")]
4814	unsafe fn test_mm256_mulhrs_epi16() {
4815	let a = _mm256_set1_epi16(`2`);
4816	let b = _mm256_set1_epi16(`4`);
4817	let r = _mm256_mullo_epi16(a, b);
4818	let e = _mm256_set1_epi16(`8`);
4819	assert_eq_m256i(r, e);
4820	}
4821
4822	#[simd_test(enable = "avx2")]
4823	unsafe fn test_mm256_or_si256() {
4824	let a = _mm256_set1_epi8(`-1`);
4825	let b = _mm256_set1_epi8(`0`);
4826	let r = _mm256_or_si256(a, b);
4827	assert_eq_m256i(r, a);
4828	}
4829
4830	#[simd_test(enable = "avx2")]
4831	unsafe fn test_mm256_packs_epi16() {
4832	let a = _mm256_set1_epi16(`2`);
4833	let b = _mm256_set1_epi16(`4`);
4834	let r = _mm256_packs_epi16(a, b);
4835	#[rustfmt::skip]
4836	let e = _mm256_setr_epi8(
4837	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
4838	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
4839	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
4840	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
4841	);
4842
4843	assert_eq_m256i(r, e);
4844	}
4845
4846	#[simd_test(enable = "avx2")]
4847	unsafe fn test_mm256_packs_epi32() {
4848	let a = _mm256_set1_epi32(`2`);
4849	let b = _mm256_set1_epi32(`4`);
4850	let r = _mm256_packs_epi32(a, b);
4851	let e = _mm256_setr_epi16(`2`, `2`, `2`, `2`, `4`, `4`, `4`, `4`, `2`, `2`, `2`, `2`, `4`, `4`, `4`, `4`);
4852
4853	assert_eq_m256i(r, e);
4854	}
4855
4856	#[simd_test(enable = "avx2")]
4857	unsafe fn test_mm256_packus_epi16() {
4858	let a = _mm256_set1_epi16(`2`);
4859	let b = _mm256_set1_epi16(`4`);
4860	let r = _mm256_packus_epi16(a, b);
4861	#[rustfmt::skip]
4862	let e = _mm256_setr_epi8(
4863	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
4864	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
4865	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
4866	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
4867	);
4868
4869	assert_eq_m256i(r, e);
4870	}
4871
4872	#[simd_test(enable = "avx2")]
4873	unsafe fn test_mm256_packus_epi32() {
4874	let a = _mm256_set1_epi32(`2`);
4875	let b = _mm256_set1_epi32(`4`);
4876	let r = _mm256_packus_epi32(a, b);
4877	let e = _mm256_setr_epi16(`2`, `2`, `2`, `2`, `4`, `4`, `4`, `4`, `2`, `2`, `2`, `2`, `4`, `4`, `4`, `4`);
4878
4879	assert_eq_m256i(r, e);
4880	}
4881
4882	#[simd_test(enable = "avx2")]
4883	unsafe fn test_mm256_sad_epu8() {
4884	let a = _mm256_set1_epi8(`2`);
4885	let b = _mm256_set1_epi8(`4`);
4886	let r = _mm256_sad_epu8(a, b);
4887	let e = _mm256_set1_epi64x(`16`);
4888	assert_eq_m256i(r, e);
4889	}
4890
4891	#[simd_test(enable = "avx2")]
4892	unsafe fn test_mm256_shufflehi_epi16() {
4893	#[rustfmt::skip]
4894	let a = _mm256_setr_epi16(
4895	`0`, `1`, `2`, `3`, `11`, `22`, `33`, `44`,
4896	`4`, `5`, `6`, `7`, `55`, `66`, `77`, `88`,
4897	);
4898	#[rustfmt::skip]
4899	let e = _mm256_setr_epi16(
4900	`0`, `1`, `2`, `3`, `44`, `22`, `22`, `11`,
4901	`4`, `5`, `6`, `7`, `88`, `66`, `66`, `55`,
4902	);
4903	let r = _mm256_shufflehi_epi16::<`0b00_01_01_11`>(a);
4904	assert_eq_m256i(r, e);
4905	}
4906
4907	#[simd_test(enable = "avx2")]
4908	unsafe fn test_mm256_shufflelo_epi16() {
4909	#[rustfmt::skip]
4910	let a = _mm256_setr_epi16(
4911	`11`, `22`, `33`, `44`, `0`, `1`, `2`, `3`,
4912	`55`, `66`, `77`, `88`, `4`, `5`, `6`, `7`,
4913	);
4914	#[rustfmt::skip]
4915	let e = _mm256_setr_epi16(
4916	`44`, `22`, `22`, `11`, `0`, `1`, `2`, `3`,
4917	`88`, `66`, `66`, `55`, `4`, `5`, `6`, `7`,
4918	);
4919	let r = _mm256_shufflelo_epi16::<`0b00_01_01_11`>(a);
4920	assert_eq_m256i(r, e);
4921	}
4922
4923	#[simd_test(enable = "avx2")]
4924	unsafe fn test_mm256_sign_epi16() {
4925	let a = _mm256_set1_epi16(`2`);
4926	let b = _mm256_set1_epi16(`-1`);
4927	let r = _mm256_sign_epi16(a, b);
4928	let e = _mm256_set1_epi16(`-2`);
4929	assert_eq_m256i(r, e);
4930	}
4931
4932	#[simd_test(enable = "avx2")]
4933	unsafe fn test_mm256_sign_epi32() {
4934	let a = _mm256_set1_epi32(`2`);
4935	let b = _mm256_set1_epi32(`-1`);
4936	let r = _mm256_sign_epi32(a, b);
4937	let e = _mm256_set1_epi32(`-2`);
4938	assert_eq_m256i(r, e);
4939	}
4940
4941	#[simd_test(enable = "avx2")]
4942	unsafe fn test_mm256_sign_epi8() {
4943	let a = _mm256_set1_epi8(`2`);
4944	let b = _mm256_set1_epi8(`-1`);
4945	let r = _mm256_sign_epi8(a, b);
4946	let e = _mm256_set1_epi8(`-2`);
4947	assert_eq_m256i(r, e);
4948	}
4949
4950	#[simd_test(enable = "avx2")]
4951	unsafe fn test_mm256_sll_epi16() {
4952	let a = _mm256_set1_epi16(`0xFF`);
4953	let b = _mm_insert_epi16::<`0`>(_mm_set1_epi16(`0`), `4`);
4954	let r = _mm256_sll_epi16(a, b);
4955	assert_eq_m256i(r, _mm256_set1_epi16(`0xFF0`));
4956	}
4957
4958	#[simd_test(enable = "avx2")]
4959	unsafe fn test_mm256_sll_epi32() {
4960	let a = _mm256_set1_epi32(`0xFFFF`);
4961	let b = _mm_insert_epi32::<`0`>(_mm_set1_epi32(`0`), `4`);
4962	let r = _mm256_sll_epi32(a, b);
4963	assert_eq_m256i(r, _mm256_set1_epi32(`0xFFFF0`));
4964	}
4965
4966	#[simd_test(enable = "avx2")]
4967	unsafe fn test_mm256_sll_epi64() {
4968	let a = _mm256_set1_epi64x(`0xFFFFFFFF`);
4969	let b = _mm_insert_epi64::<`0`>(_mm_set1_epi64x(`0`), `4`);
4970	let r = _mm256_sll_epi64(a, b);
4971	assert_eq_m256i(r, _mm256_set1_epi64x(`0xFFFFFFFF0`));
4972	}
4973
4974	#[simd_test(enable = "avx2")]
4975	unsafe fn test_mm256_slli_epi16() {
4976	assert_eq_m256i(
4977	_mm256_slli_epi16::<`4`>(_mm256_set1_epi16(`0xFF`)),
4978	_mm256_set1_epi16(`0xFF0`),
4979	);
4980	}
4981
4982	#[simd_test(enable = "avx2")]
4983	unsafe fn test_mm256_slli_epi32() {
4984	assert_eq_m256i(
4985	_mm256_slli_epi32::<`4`>(_mm256_set1_epi32(`0xFFFF`)),
4986	_mm256_set1_epi32(`0xFFFF0`),
4987	);
4988	}
4989
4990	#[simd_test(enable = "avx2")]
4991	unsafe fn test_mm256_slli_epi64() {
4992	assert_eq_m256i(
4993	_mm256_slli_epi64::<`4`>(_mm256_set1_epi64x(`0xFFFFFFFF`)),
4994	_mm256_set1_epi64x(`0xFFFFFFFF0`),
4995	);
4996	}
4997
4998	#[simd_test(enable = "avx2")]
4999	unsafe fn test_mm256_slli_si256() {
5000	let a = _mm256_set1_epi64x(`0xFFFFFFFF`);
5001	let r = _mm256_slli_si256::<`3`>(a);
5002	assert_eq_m256i(r, _mm256_set1_epi64x(`0xFFFFFFFF000000`));
5003	}
5004
5005	#[simd_test(enable = "avx2")]
5006	unsafe fn test_mm_sllv_epi32() {
5007	let a = _mm_set1_epi32(`2`);
5008	let b = _mm_set1_epi32(`1`);
5009	let r = _mm_sllv_epi32(a, b);
5010	let e = _mm_set1_epi32(`4`);
5011	assert_eq_m128i(r, e);
5012	}
5013
5014	#[simd_test(enable = "avx2")]
5015	unsafe fn test_mm256_sllv_epi32() {
5016	let a = _mm256_set1_epi32(`2`);
5017	let b = _mm256_set1_epi32(`1`);
5018	let r = _mm256_sllv_epi32(a, b);
5019	let e = _mm256_set1_epi32(`4`);
5020	assert_eq_m256i(r, e);
5021	}
5022
5023	#[simd_test(enable = "avx2")]
5024	unsafe fn test_mm_sllv_epi64() {
5025	let a = _mm_set1_epi64x(`2`);
5026	let b = _mm_set1_epi64x(`1`);
5027	let r = _mm_sllv_epi64(a, b);
5028	let e = _mm_set1_epi64x(`4`);
5029	assert_eq_m128i(r, e);
5030	}
5031
5032	#[simd_test(enable = "avx2")]
5033	unsafe fn test_mm256_sllv_epi64() {
5034	let a = _mm256_set1_epi64x(`2`);
5035	let b = _mm256_set1_epi64x(`1`);
5036	let r = _mm256_sllv_epi64(a, b);
5037	let e = _mm256_set1_epi64x(`4`);
5038	assert_eq_m256i(r, e);
5039	}
5040
5041	#[simd_test(enable = "avx2")]
5042	unsafe fn test_mm256_sra_epi16() {
5043	let a = _mm256_set1_epi16(`-1`);
5044	let b = _mm_setr_epi16(`1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
5045	let r = _mm256_sra_epi16(a, b);
5046	assert_eq_m256i(r, _mm256_set1_epi16(`-1`));
5047	}
5048
5049	#[simd_test(enable = "avx2")]
5050	unsafe fn test_mm256_sra_epi32() {
5051	let a = _mm256_set1_epi32(`-1`);
5052	let b = _mm_insert_epi32::<`0`>(_mm_set1_epi32(`0`), `1`);
5053	let r = _mm256_sra_epi32(a, b);
5054	assert_eq_m256i(r, _mm256_set1_epi32(`-1`));
5055	}
5056
5057	#[simd_test(enable = "avx2")]
5058	unsafe fn test_mm256_srai_epi16() {
5059	assert_eq_m256i(
5060	_mm256_srai_epi16::<`1`>(_mm256_set1_epi16(`-1`)),
5061	_mm256_set1_epi16(`-1`),
5062	);
5063	}
5064
5065	#[simd_test(enable = "avx2")]
5066	unsafe fn test_mm256_srai_epi32() {
5067	assert_eq_m256i(
5068	_mm256_srai_epi32::<`1`>(_mm256_set1_epi32(`-1`)),
5069	_mm256_set1_epi32(`-1`),
5070	);
5071	}
5072
5073	#[simd_test(enable = "avx2")]
5074	unsafe fn test_mm_srav_epi32() {
5075	let a = _mm_set1_epi32(`4`);
5076	let count = _mm_set1_epi32(`1`);
5077	let r = _mm_srav_epi32(a, count);
5078	let e = _mm_set1_epi32(`2`);
5079	assert_eq_m128i(r, e);
5080	}
5081
5082	#[simd_test(enable = "avx2")]
5083	unsafe fn test_mm256_srav_epi32() {
5084	let a = _mm256_set1_epi32(`4`);
5085	let count = _mm256_set1_epi32(`1`);
5086	let r = _mm256_srav_epi32(a, count);
5087	let e = _mm256_set1_epi32(`2`);
5088	assert_eq_m256i(r, e);
5089	}
5090
5091	#[simd_test(enable = "avx2")]
5092	unsafe fn test_mm256_srli_si256() {
5093	#[rustfmt::skip]
5094	let a = _mm256_setr_epi8(
5095	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
5096	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
5097	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
5098	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
5099	);
5100	let r = _mm256_srli_si256::<`3`>(a);
5101	#[rustfmt::skip]
5102	let e = _mm256_setr_epi8(
5103	`4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`,
5104	`12`, `13`, `14`, `15`, `16`, `0`, `0`, `0`,
5105	`20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`,
5106	`28`, `29`, `30`, `31`, `32`, `0`, `0`, `0`,
5107	);
5108	assert_eq_m256i(r, e);
5109	}
5110
5111	#[simd_test(enable = "avx2")]
5112	unsafe fn test_mm256_srl_epi16() {
5113	let a = _mm256_set1_epi16(`0xFF`);
5114	let b = _mm_insert_epi16::<`0`>(_mm_set1_epi16(`0`), `4`);
5115	let r = _mm256_srl_epi16(a, b);
5116	assert_eq_m256i(r, _mm256_set1_epi16(`0xF`));
5117	}
5118
5119	#[simd_test(enable = "avx2")]
5120	unsafe fn test_mm256_srl_epi32() {
5121	let a = _mm256_set1_epi32(`0xFFFF`);
5122	let b = _mm_insert_epi32::<`0`>(_mm_set1_epi32(`0`), `4`);
5123	let r = _mm256_srl_epi32(a, b);
5124	assert_eq_m256i(r, _mm256_set1_epi32(`0xFFF`));
5125	}
5126
5127	#[simd_test(enable = "avx2")]
5128	unsafe fn test_mm256_srl_epi64() {
5129	let a = _mm256_set1_epi64x(`0xFFFFFFFF`);
5130	let b = _mm_setr_epi64x(`4`, `0`);
5131	let r = _mm256_srl_epi64(a, b);
5132	assert_eq_m256i(r, _mm256_set1_epi64x(`0xFFFFFFF`));
5133	}
5134
5135	#[simd_test(enable = "avx2")]
5136	unsafe fn test_mm256_srli_epi16() {
5137	assert_eq_m256i(
5138	_mm256_srli_epi16::<`4`>(_mm256_set1_epi16(`0xFF`)),
5139	_mm256_set1_epi16(`0xF`),
5140	);
5141	}
5142
5143	#[simd_test(enable = "avx2")]
5144	unsafe fn test_mm256_srli_epi32() {
5145	assert_eq_m256i(
5146	_mm256_srli_epi32::<`4`>(_mm256_set1_epi32(`0xFFFF`)),
5147	_mm256_set1_epi32(`0xFFF`),
5148	);
5149	}
5150
5151	#[simd_test(enable = "avx2")]
5152	unsafe fn test_mm256_srli_epi64() {
5153	assert_eq_m256i(
5154	_mm256_srli_epi64::<`4`>(_mm256_set1_epi64x(`0xFFFFFFFF`)),
5155	_mm256_set1_epi64x(`0xFFFFFFF`),
5156	);
5157	}
5158
5159	#[simd_test(enable = "avx2")]
5160	unsafe fn test_mm_srlv_epi32() {
5161	let a = _mm_set1_epi32(`2`);
5162	let count = _mm_set1_epi32(`1`);
5163	let r = _mm_srlv_epi32(a, count);
5164	let e = _mm_set1_epi32(`1`);
5165	assert_eq_m128i(r, e);
5166	}
5167
5168	#[simd_test(enable = "avx2")]
5169	unsafe fn test_mm256_srlv_epi32() {
5170	let a = _mm256_set1_epi32(`2`);
5171	let count = _mm256_set1_epi32(`1`);
5172	let r = _mm256_srlv_epi32(a, count);
5173	let e = _mm256_set1_epi32(`1`);
5174	assert_eq_m256i(r, e);
5175	}
5176
5177	#[simd_test(enable = "avx2")]
5178	unsafe fn test_mm_srlv_epi64() {
5179	let a = _mm_set1_epi64x(`2`);
5180	let count = _mm_set1_epi64x(`1`);
5181	let r = _mm_srlv_epi64(a, count);
5182	let e = _mm_set1_epi64x(`1`);
5183	assert_eq_m128i(r, e);
5184	}
5185
5186	#[simd_test(enable = "avx2")]
5187	unsafe fn test_mm256_srlv_epi64() {
5188	let a = _mm256_set1_epi64x(`2`);
5189	let count = _mm256_set1_epi64x(`1`);
5190	let r = _mm256_srlv_epi64(a, count);
5191	let e = _mm256_set1_epi64x(`1`);
5192	assert_eq_m256i(r, e);
5193	}
5194
5195	#[simd_test(enable = "avx2")]
5196	unsafe fn test_mm256_sub_epi16() {
5197	let a = _mm256_set1_epi16(`4`);
5198	let b = _mm256_set1_epi16(`2`);
5199	let r = _mm256_sub_epi16(a, b);
5200	assert_eq_m256i(r, b);
5201	}
5202
5203	#[simd_test(enable = "avx2")]
5204	unsafe fn test_mm256_sub_epi32() {
5205	let a = _mm256_set1_epi32(`4`);
5206	let b = _mm256_set1_epi32(`2`);
5207	let r = _mm256_sub_epi32(a, b);
5208	assert_eq_m256i(r, b);
5209	}
5210
5211	#[simd_test(enable = "avx2")]
5212	unsafe fn test_mm256_sub_epi64() {
5213	let a = _mm256_set1_epi64x(`4`);
5214	let b = _mm256_set1_epi64x(`2`);
5215	let r = _mm256_sub_epi64(a, b);
5216	assert_eq_m256i(r, b);
5217	}
5218
5219	#[simd_test(enable = "avx2")]
5220	unsafe fn test_mm256_sub_epi8() {
5221	let a = _mm256_set1_epi8(`4`);
5222	let b = _mm256_set1_epi8(`2`);
5223	let r = _mm256_sub_epi8(a, b);
5224	assert_eq_m256i(r, b);
5225	}
5226
5227	#[simd_test(enable = "avx2")]
5228	unsafe fn test_mm256_subs_epi16() {
5229	let a = _mm256_set1_epi16(`4`);
5230	let b = _mm256_set1_epi16(`2`);
5231	let r = _mm256_subs_epi16(a, b);
5232	assert_eq_m256i(r, b);
5233	}
5234
5235	#[simd_test(enable = "avx2")]
5236	unsafe fn test_mm256_subs_epi8() {
5237	let a = _mm256_set1_epi8(`4`);
5238	let b = _mm256_set1_epi8(`2`);
5239	let r = _mm256_subs_epi8(a, b);
5240	assert_eq_m256i(r, b);
5241	}
5242
5243	#[simd_test(enable = "avx2")]
5244	unsafe fn test_mm256_subs_epu16() {
5245	let a = _mm256_set1_epi16(`4`);
5246	let b = _mm256_set1_epi16(`2`);
5247	let r = _mm256_subs_epu16(a, b);
5248	assert_eq_m256i(r, b);
5249	}
5250
5251	#[simd_test(enable = "avx2")]
5252	unsafe fn test_mm256_subs_epu8() {
5253	let a = _mm256_set1_epi8(`4`);
5254	let b = _mm256_set1_epi8(`2`);
5255	let r = _mm256_subs_epu8(a, b);
5256	assert_eq_m256i(r, b);
5257	}
5258
5259	#[simd_test(enable = "avx2")]
5260	unsafe fn test_mm256_xor_si256() {
5261	let a = _mm256_set1_epi8(`5`);
5262	let b = _mm256_set1_epi8(`3`);
5263	let r = _mm256_xor_si256(a, b);
5264	assert_eq_m256i(r, _mm256_set1_epi8(`6`));
5265	}
5266
5267	#[simd_test(enable = "avx2")]
5268	unsafe fn test_mm256_alignr_epi8() {
5269	#[rustfmt::skip]
5270	let a = _mm256_setr_epi8(
5271	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
5272	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
5273	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
5274	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
5275	);
5276	#[rustfmt::skip]
5277	let b = _mm256_setr_epi8(
5278	`-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`, `-8`,
5279	`-9`, `-10`, `-11`, `-12`, `-13`, `-14`, `-15`, `-16`,
5280	`-17`, `-18`, `-19`, `-20`, `-21`, `-22`, `-23`, `-24`,
5281	`-25`, `-26`, `-27`, `-28`, `-29`, `-30`, `-31`, `-32`,
5282	);
5283	let r = _mm256_alignr_epi8::<`33`>(a, b);
5284	assert_eq_m256i(r, _mm256_set1_epi8(`0`));
5285
5286	let r = _mm256_alignr_epi8::<`17`>(a, b);
5287	#[rustfmt::skip]
5288	let expected = _mm256_setr_epi8(
5289	`2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`,
5290	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `0`,
5291	`18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
5292	`26`, `27`, `28`, `29`, `30`, `31`, `32`, `0`,
5293	);
5294	assert_eq_m256i(r, expected);
5295
5296	let r = _mm256_alignr_epi8::<`4`>(a, b);
5297	#[rustfmt::skip]
5298	let expected = _mm256_setr_epi8(
5299	`-5`, `-6`, `-7`, `-8`, `-9`, `-10`, `-11`, `-12`,
5300	`-13`, `-14`, `-15`, `-16`, `1`, `2`, `3`, `4`,
5301	`-21`, `-22`, `-23`, `-24`, `-25`, `-26`, `-27`, `-28`,
5302	`-29`, `-30`, `-31`, `-32`, `17`, `18`, `19`, `20`,
5303	);
5304	assert_eq_m256i(r, expected);
5305
5306	#[rustfmt::skip]
5307	let expected = _mm256_setr_epi8(
5308	`-1`, `-2`, `-3`, `-4`, `-5`, `-6`, `-7`, `-8`,
5309	`-9`, `-10`, `-11`, `-12`, `-13`, `-14`, `-15`, `-16`, `-17`,
5310	`-18`, `-19`, `-20`, `-21`, `-22`, `-23`, `-24`, `-25`,
5311	`-26`, `-27`, `-28`, `-29`, `-30`, `-31`, `-32`,
5312	);
5313	let r = _mm256_alignr_epi8::<`16`>(a, b);
5314	assert_eq_m256i(r, expected);
5315
5316	let r = _mm256_alignr_epi8::<`15`>(a, b);
5317	#[rustfmt::skip]
5318	let expected = _mm256_setr_epi8(
5319	`-16`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
5320	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
5321	`-32`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
5322	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
5323	);
5324	assert_eq_m256i(r, expected);
5325
5326	let r = _mm256_alignr_epi8::<`0`>(a, b);
5327	assert_eq_m256i(r, b);
5328	}
5329
5330	#[simd_test(enable = "avx2")]
5331	unsafe fn test_mm256_shuffle_epi8() {
5332	#[rustfmt::skip]
5333	let a = _mm256_setr_epi8(
5334	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
5335	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
5336	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
5337	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
5338	);
5339	#[rustfmt::skip]
5340	let b = _mm256_setr_epi8(
5341	`4`, `128u8` as i8, `4`, `3`, `24`, `12`, `6`, `19`,
5342	`12`, `5`, `5`, `10`, `4`, `1`, `8`, `0`,
5343	`4`, `128u8` as i8, `4`, `3`, `24`, `12`, `6`, `19`,
5344	`12`, `5`, `5`, `10`, `4`, `1`, `8`, `0`,
5345	);
5346	#[rustfmt::skip]
5347	let expected = _mm256_setr_epi8(
5348	`5`, `0`, `5`, `4`, `9`, `13`, `7`, `4`,
5349	`13`, `6`, `6`, `11`, `5`, `2`, `9`, `1`,
5350	`21`, `0`, `21`, `20`, `25`, `29`, `23`, `20`,
5351	`29`, `22`, `22`, `27`, `21`, `18`, `25`, `17`,
5352	);
5353	let r = _mm256_shuffle_epi8(a, b);
5354	assert_eq_m256i(r, expected);
5355	}
5356
5357	#[simd_test(enable = "avx2")]
5358	unsafe fn test_mm256_permutevar8x32_epi32() {
5359	let a = _mm256_setr_epi32(`100`, `200`, `300`, `400`, `500`, `600`, `700`, `800`);
5360	let b = _mm256_setr_epi32(`5`, `0`, `5`, `1`, `7`, `6`, `3`, `4`);
5361	let expected = _mm256_setr_epi32(`600`, `100`, `600`, `200`, `800`, `700`, `400`, `500`);
5362	let r = _mm256_permutevar8x32_epi32(a, b);
5363	assert_eq_m256i(r, expected);
5364	}
5365
5366	#[simd_test(enable = "avx2")]
5367	unsafe fn test_mm256_permute4x64_epi64() {
5368	let a = _mm256_setr_epi64x(`100`, `200`, `300`, `400`);
5369	let expected = _mm256_setr_epi64x(`400`, `100`, `200`, `100`);
5370	let r = _mm256_permute4x64_epi64::<`0b00010011`>(a);
5371	assert_eq_m256i(r, expected);
5372	}
5373
5374	#[simd_test(enable = "avx2")]
5375	unsafe fn test_mm256_permute2x128_si256() {
5376	let a = _mm256_setr_epi64x(`100`, `200`, `500`, `600`);
5377	let b = _mm256_setr_epi64x(`300`, `400`, `700`, `800`);
5378	let r = _mm256_permute2x128_si256::<`0b00_01_00_11`>(a, b);
5379	let e = _mm256_setr_epi64x(`700`, `800`, `500`, `600`);
5380	assert_eq_m256i(r, e);
5381	}
5382
5383	#[simd_test(enable = "avx2")]
5384	unsafe fn test_mm256_permute4x64_pd() {
5385	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
5386	let r = _mm256_permute4x64_pd::<`0b00_01_00_11`>(a);
5387	let e = _mm256_setr_pd(`4.`, `1.`, `2.`, `1.`);
5388	assert_eq_m256d(r, e);
5389	}
5390
5391	#[simd_test(enable = "avx2")]
5392	unsafe fn test_mm256_permutevar8x32_ps() {
5393	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
5394	let b = _mm256_setr_epi32(`5`, `0`, `5`, `1`, `7`, `6`, `3`, `4`);
5395	let r = _mm256_permutevar8x32_ps(a, b);
5396	let e = _mm256_setr_ps(`6.`, `1.`, `6.`, `2.`, `8.`, `7.`, `4.`, `5.`);
5397	assert_eq_m256(r, e);
5398	}
5399
5400	#[simd_test(enable = "avx2")]
5401	unsafe fn test_mm_i32gather_epi32() {
5402	let mut arr = [`0i32`; `128`];
5403	for i in `0`..`128i32` {
5404	arr[i as usize] = i;
5405	}
5406	// A multiplier of 4 is word-addressing
5407	let r = _mm_i32gather_epi32::<`4`>(arr.as_ptr(), _mm_setr_epi32(`0`, `16`, `32`, `48`));
5408	assert_eq_m128i(r, _mm_setr_epi32(`0`, `16`, `32`, `48`));
5409	}
5410
5411	#[simd_test(enable = "avx2")]
5412	unsafe fn test_mm_mask_i32gather_epi32() {
5413	let mut arr = [`0i32`; `128`];
5414	for i in `0`..`128i32` {
5415	arr[i as usize] = i;
5416	}
5417	// A multiplier of 4 is word-addressing
5418	let r = _mm_mask_i32gather_epi32::<`4`>(
5419	_mm_set1_epi32(`256`),
5420	arr.as_ptr(),
5421	_mm_setr_epi32(`0`, `16`, `64`, `96`),
5422	_mm_setr_epi32(`-1`, `-1`, `-1`, `0`),
5423	);
5424	assert_eq_m128i(r, _mm_setr_epi32(`0`, `16`, `64`, `256`));
5425	}
5426
5427	#[simd_test(enable = "avx2")]
5428	unsafe fn test_mm256_i32gather_epi32() {
5429	let mut arr = [`0i32`; `128`];
5430	for i in `0`..`128i32` {
5431	arr[i as usize] = i;
5432	}
5433	// A multiplier of 4 is word-addressing
5434	let r =
5435	_mm256_i32gather_epi32::<`4`>(arr.as_ptr(), _mm256_setr_epi32(`0`, `16`, `32`, `48`, `1`, `2`, `3`, `4`));
5436	assert_eq_m256i(r, _mm256_setr_epi32(`0`, `16`, `32`, `48`, `1`, `2`, `3`, `4`));
5437	}
5438
5439	#[simd_test(enable = "avx2")]
5440	unsafe fn test_mm256_mask_i32gather_epi32() {
5441	let mut arr = [`0i32`; `128`];
5442	for i in `0`..`128i32` {
5443	arr[i as usize] = i;
5444	}
5445	// A multiplier of 4 is word-addressing
5446	let r = _mm256_mask_i32gather_epi32::<`4`>(
5447	_mm256_set1_epi32(`256`),
5448	arr.as_ptr(),
5449	_mm256_setr_epi32(`0`, `16`, `64`, `96`, `0`, `0`, `0`, `0`),
5450	_mm256_setr_epi32(`-1`, `-1`, `-1`, `0`, `0`, `0`, `0`, `0`),
5451	);
5452	assert_eq_m256i(r, _mm256_setr_epi32(`0`, `16`, `64`, `256`, `256`, `256`, `256`, `256`));
5453	}
5454
5455	#[simd_test(enable = "avx2")]
5456	unsafe fn test_mm_i32gather_ps() {
5457	let mut arr = [`0.0f32`; `128`];
5458	let mut j = `0.0`;
5459	for i in `0`..`128usize` {
5460	arr[i] = j;
5461	j += `1.0`;
5462	}
5463	// A multiplier of 4 is word-addressing for f32s
5464	let r = _mm_i32gather_ps::<`4`>(arr.as_ptr(), _mm_setr_epi32(`0`, `16`, `32`, `48`));
5465	assert_eq_m128(r, _mm_setr_ps(`0.0`, `16.0`, `32.0`, `48.0`));
5466	}
5467
5468	#[simd_test(enable = "avx2")]
5469	unsafe fn test_mm_mask_i32gather_ps() {
5470	let mut arr = [`0.0f32`; `128`];
5471	let mut j = `0.0`;
5472	for i in `0`..`128usize` {
5473	arr[i] = j;
5474	j += `1.0`;
5475	}
5476	// A multiplier of 4 is word-addressing for f32s
5477	let r = _mm_mask_i32gather_ps::<`4`>(
5478	_mm_set1_ps(`256.0`),
5479	arr.as_ptr(),
5480	_mm_setr_epi32(`0`, `16`, `64`, `96`),
5481	_mm_setr_ps(`-1.0`, `-1.0`, `-1.0`, `0.0`),
5482	);
5483	assert_eq_m128(r, _mm_setr_ps(`0.0`, `16.0`, `64.0`, `256.0`));
5484	}
5485
5486	#[simd_test(enable = "avx2")]
5487	unsafe fn test_mm256_i32gather_ps() {
5488	let mut arr = [`0.0f32`; `128`];
5489	let mut j = `0.0`;
5490	for i in `0`..`128usize` {
5491	arr[i] = j;
5492	j += `1.0`;
5493	}
5494	// A multiplier of 4 is word-addressing for f32s
5495	let r =
5496	_mm256_i32gather_ps::<`4`>(arr.as_ptr(), _mm256_setr_epi32(`0`, `16`, `32`, `48`, `1`, `2`, `3`, `4`));
5497	assert_eq_m256(r, _mm256_setr_ps(`0.0`, `16.0`, `32.0`, `48.0`, `1.0`, `2.0`, `3.0`, `4.0`));
5498	}
5499
5500	#[simd_test(enable = "avx2")]
5501	unsafe fn test_mm256_mask_i32gather_ps() {
5502	let mut arr = [`0.0f32`; `128`];
5503	let mut j = `0.0`;
5504	for i in `0`..`128usize` {
5505	arr[i] = j;
5506	j += `1.0`;
5507	}
5508	// A multiplier of 4 is word-addressing for f32s
5509	let r = _mm256_mask_i32gather_ps::<`4`>(
5510	_mm256_set1_ps(`256.0`),
5511	arr.as_ptr(),
5512	_mm256_setr_epi32(`0`, `16`, `64`, `96`, `0`, `0`, `0`, `0`),
5513	_mm256_setr_ps(`-1.0`, `-1.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`),
5514	);
5515	assert_eq_m256(
5516	r,
5517	_mm256_setr_ps(`0.0`, `16.0`, `64.0`, `256.0`, `256.0`, `256.0`, `256.0`, `256.0`),
5518	);
5519	}
5520
5521	#[simd_test(enable = "avx2")]
5522	unsafe fn test_mm_i32gather_epi64() {
5523	let mut arr = [`0i64`; `128`];
5524	for i in `0`..`128i64` {
5525	arr[i as usize] = i;
5526	}
5527	// A multiplier of 8 is word-addressing for i64s
5528	let r = _mm_i32gather_epi64::<`8`>(arr.as_ptr(), _mm_setr_epi32(`0`, `16`, `0`, `0`));
5529	assert_eq_m128i(r, _mm_setr_epi64x(`0`, `16`));
5530	}
5531
5532	#[simd_test(enable = "avx2")]
5533	unsafe fn test_mm_mask_i32gather_epi64() {
5534	let mut arr = [`0i64`; `128`];
5535	for i in `0`..`128i64` {
5536	arr[i as usize] = i;
5537	}
5538	// A multiplier of 8 is word-addressing for i64s
5539	let r = _mm_mask_i32gather_epi64::<`8`>(
5540	_mm_set1_epi64x(`256`),
5541	arr.as_ptr(),
5542	_mm_setr_epi32(`16`, `16`, `16`, `16`),
5543	_mm_setr_epi64x(`-1`, `0`),
5544	);
5545	assert_eq_m128i(r, _mm_setr_epi64x(`16`, `256`));
5546	}
5547
5548	#[simd_test(enable = "avx2")]
5549	unsafe fn test_mm256_i32gather_epi64() {
5550	let mut arr = [`0i64`; `128`];
5551	for i in `0`..`128i64` {
5552	arr[i as usize] = i;
5553	}
5554	// A multiplier of 8 is word-addressing for i64s
5555	let r = _mm256_i32gather_epi64::<`8`>(arr.as_ptr(), _mm_setr_epi32(`0`, `16`, `32`, `48`));
5556	assert_eq_m256i(r, _mm256_setr_epi64x(`0`, `16`, `32`, `48`));
5557	}
5558
5559	#[simd_test(enable = "avx2")]
5560	unsafe fn test_mm256_mask_i32gather_epi64() {
5561	let mut arr = [`0i64`; `128`];
5562	for i in `0`..`128i64` {
5563	arr[i as usize] = i;
5564	}
5565	// A multiplier of 8 is word-addressing for i64s
5566	let r = _mm256_mask_i32gather_epi64::<`8`>(
5567	_mm256_set1_epi64x(`256`),
5568	arr.as_ptr(),
5569	_mm_setr_epi32(`0`, `16`, `64`, `96`),
5570	_mm256_setr_epi64x(`-1`, `-1`, `-1`, `0`),
5571	);
5572	assert_eq_m256i(r, _mm256_setr_epi64x(`0`, `16`, `64`, `256`));
5573	}
5574
5575	#[simd_test(enable = "avx2")]
5576	unsafe fn test_mm_i32gather_pd() {
5577	let mut arr = [`0.0f64`; `128`];
5578	let mut j = `0.0`;
5579	for i in `0`..`128usize` {
5580	arr[i] = j;
5581	j += `1.0`;
5582	}
5583	// A multiplier of 8 is word-addressing for f64s
5584	let r = _mm_i32gather_pd::<`8`>(arr.as_ptr(), _mm_setr_epi32(`0`, `16`, `0`, `0`));
5585	assert_eq_m128d(r, _mm_setr_pd(`0.0`, `16.0`));
5586	}
5587
5588	#[simd_test(enable = "avx2")]
5589	unsafe fn test_mm_mask_i32gather_pd() {
5590	let mut arr = [`0.0f64`; `128`];
5591	let mut j = `0.0`;
5592	for i in `0`..`128usize` {
5593	arr[i] = j;
5594	j += `1.0`;
5595	}
5596	// A multiplier of 8 is word-addressing for f64s
5597	let r = _mm_mask_i32gather_pd::<`8`>(
5598	_mm_set1_pd(`256.0`),
5599	arr.as_ptr(),
5600	_mm_setr_epi32(`16`, `16`, `16`, `16`),
5601	_mm_setr_pd(`-1.0`, `0.0`),
5602	);
5603	assert_eq_m128d(r, _mm_setr_pd(`16.0`, `256.0`));
5604	}
5605
5606	#[simd_test(enable = "avx2")]
5607	unsafe fn test_mm256_i32gather_pd() {
5608	let mut arr = [`0.0f64`; `128`];
5609	let mut j = `0.0`;
5610	for i in `0`..`128usize` {
5611	arr[i] = j;
5612	j += `1.0`;
5613	}
5614	// A multiplier of 8 is word-addressing for f64s
5615	let r = _mm256_i32gather_pd::<`8`>(arr.as_ptr(), _mm_setr_epi32(`0`, `16`, `32`, `48`));
5616	assert_eq_m256d(r, _mm256_setr_pd(`0.0`, `16.0`, `32.0`, `48.0`));
5617	}
5618
5619	#[simd_test(enable = "avx2")]
5620	unsafe fn test_mm256_mask_i32gather_pd() {
5621	let mut arr = [`0.0f64`; `128`];
5622	let mut j = `0.0`;
5623	for i in `0`..`128usize` {
5624	arr[i] = j;
5625	j += `1.0`;
5626	}
5627	// A multiplier of 8 is word-addressing for f64s
5628	let r = _mm256_mask_i32gather_pd::<`8`>(
5629	_mm256_set1_pd(`256.0`),
5630	arr.as_ptr(),
5631	_mm_setr_epi32(`0`, `16`, `64`, `96`),
5632	_mm256_setr_pd(`-1.0`, `-1.0`, `-1.0`, `0.0`),
5633	);
5634	assert_eq_m256d(r, _mm256_setr_pd(`0.0`, `16.0`, `64.0`, `256.0`));
5635	}
5636
5637	#[simd_test(enable = "avx2")]
5638	unsafe fn test_mm_i64gather_epi32() {
5639	let mut arr = [`0i32`; `128`];
5640	for i in `0`..`128i32` {
5641	arr[i as usize] = i;
5642	}
5643	// A multiplier of 4 is word-addressing
5644	let r = _mm_i64gather_epi32::<`4`>(arr.as_ptr(), _mm_setr_epi64x(`0`, `16`));
5645	assert_eq_m128i(r, _mm_setr_epi32(`0`, `16`, `0`, `0`));
5646	}
5647
5648	#[simd_test(enable = "avx2")]
5649	unsafe fn test_mm_mask_i64gather_epi32() {
5650	let mut arr = [`0i32`; `128`];
5651	for i in `0`..`128i32` {
5652	arr[i as usize] = i;
5653	}
5654	// A multiplier of 4 is word-addressing
5655	let r = _mm_mask_i64gather_epi32::<`4`>(
5656	_mm_set1_epi32(`256`),
5657	arr.as_ptr(),
5658	_mm_setr_epi64x(`0`, `16`),
5659	_mm_setr_epi32(`-1`, `0`, `-1`, `0`),
5660	);
5661	assert_eq_m128i(r, _mm_setr_epi32(`0`, `256`, `0`, `0`));
5662	}
5663
5664	#[simd_test(enable = "avx2")]
5665	unsafe fn test_mm256_i64gather_epi32() {
5666	let mut arr = [`0i32`; `128`];
5667	for i in `0`..`128i32` {
5668	arr[i as usize] = i;
5669	}
5670	// A multiplier of 4 is word-addressing
5671	let r = _mm256_i64gather_epi32::<`4`>(arr.as_ptr(), _mm256_setr_epi64x(`0`, `16`, `32`, `48`));
5672	assert_eq_m128i(r, _mm_setr_epi32(`0`, `16`, `32`, `48`));
5673	}
5674
5675	#[simd_test(enable = "avx2")]
5676	unsafe fn test_mm256_mask_i64gather_epi32() {
5677	let mut arr = [`0i32`; `128`];
5678	for i in `0`..`128i32` {
5679	arr[i as usize] = i;
5680	}
5681	// A multiplier of 4 is word-addressing
5682	let r = _mm256_mask_i64gather_epi32::<`4`>(
5683	_mm_set1_epi32(`256`),
5684	arr.as_ptr(),
5685	_mm256_setr_epi64x(`0`, `16`, `64`, `96`),
5686	_mm_setr_epi32(`-1`, `-1`, `-1`, `0`),
5687	);
5688	assert_eq_m128i(r, _mm_setr_epi32(`0`, `16`, `64`, `256`));
5689	}
5690
5691	#[simd_test(enable = "avx2")]
5692	unsafe fn test_mm_i64gather_ps() {
5693	let mut arr = [`0.0f32`; `128`];
5694	let mut j = `0.0`;
5695	for i in `0`..`128usize` {
5696	arr[i] = j;
5697	j += `1.0`;
5698	}
5699	// A multiplier of 4 is word-addressing for f32s
5700	let r = _mm_i64gather_ps::<`4`>(arr.as_ptr(), _mm_setr_epi64x(`0`, `16`));
5701	assert_eq_m128(r, _mm_setr_ps(`0.0`, `16.0`, `0.0`, `0.0`));
5702	}
5703
5704	#[simd_test(enable = "avx2")]
5705	unsafe fn test_mm_mask_i64gather_ps() {
5706	let mut arr = [`0.0f32`; `128`];
5707	let mut j = `0.0`;
5708	for i in `0`..`128usize` {
5709	arr[i] = j;
5710	j += `1.0`;
5711	}
5712	// A multiplier of 4 is word-addressing for f32s
5713	let r = _mm_mask_i64gather_ps::<`4`>(
5714	_mm_set1_ps(`256.0`),
5715	arr.as_ptr(),
5716	_mm_setr_epi64x(`0`, `16`),
5717	_mm_setr_ps(`-1.0`, `0.0`, `-1.0`, `0.0`),
5718	);
5719	assert_eq_m128(r, _mm_setr_ps(`0.0`, `256.0`, `0.0`, `0.0`));
5720	}
5721
5722	#[simd_test(enable = "avx2")]
5723	unsafe fn test_mm256_i64gather_ps() {
5724	let mut arr = [`0.0f32`; `128`];
5725	let mut j = `0.0`;
5726	for i in `0`..`128usize` {
5727	arr[i] = j;
5728	j += `1.0`;
5729	}
5730	// A multiplier of 4 is word-addressing for f32s
5731	let r = _mm256_i64gather_ps::<`4`>(arr.as_ptr(), _mm256_setr_epi64x(`0`, `16`, `32`, `48`));
5732	assert_eq_m128(r, _mm_setr_ps(`0.0`, `16.0`, `32.0`, `48.0`));
5733	}
5734
5735	#[simd_test(enable = "avx2")]
5736	unsafe fn test_mm256_mask_i64gather_ps() {
5737	let mut arr = [`0.0f32`; `128`];
5738	let mut j = `0.0`;
5739	for i in `0`..`128usize` {
5740	arr[i] = j;
5741	j += `1.0`;
5742	}
5743	// A multiplier of 4 is word-addressing for f32s
5744	let r = _mm256_mask_i64gather_ps::<`4`>(
5745	_mm_set1_ps(`256.0`),
5746	arr.as_ptr(),
5747	_mm256_setr_epi64x(`0`, `16`, `64`, `96`),
5748	_mm_setr_ps(`-1.0`, `-1.0`, `-1.0`, `0.0`),
5749	);
5750	assert_eq_m128(r, _mm_setr_ps(`0.0`, `16.0`, `64.0`, `256.0`));
5751	}
5752
5753	#[simd_test(enable = "avx2")]
5754	unsafe fn test_mm_i64gather_epi64() {
5755	let mut arr = [`0i64`; `128`];
5756	for i in `0`..`128i64` {
5757	arr[i as usize] = i;
5758	}
5759	// A multiplier of 8 is word-addressing for i64s
5760	let r = _mm_i64gather_epi64::<`8`>(arr.as_ptr(), _mm_setr_epi64x(`0`, `16`));
5761	assert_eq_m128i(r, _mm_setr_epi64x(`0`, `16`));
5762	}
5763
5764	#[simd_test(enable = "avx2")]
5765	unsafe fn test_mm_mask_i64gather_epi64() {
5766	let mut arr = [`0i64`; `128`];
5767	for i in `0`..`128i64` {
5768	arr[i as usize] = i;
5769	}
5770	// A multiplier of 8 is word-addressing for i64s
5771	let r = _mm_mask_i64gather_epi64::<`8`>(
5772	_mm_set1_epi64x(`256`),
5773	arr.as_ptr(),
5774	_mm_setr_epi64x(`16`, `16`),
5775	_mm_setr_epi64x(`-1`, `0`),
5776	);
5777	assert_eq_m128i(r, _mm_setr_epi64x(`16`, `256`));
5778	}
5779
5780	#[simd_test(enable = "avx2")]
5781	unsafe fn test_mm256_i64gather_epi64() {
5782	let mut arr = [`0i64`; `128`];
5783	for i in `0`..`128i64` {
5784	arr[i as usize] = i;
5785	}
5786	// A multiplier of 8 is word-addressing for i64s
5787	let r = _mm256_i64gather_epi64::<`8`>(arr.as_ptr(), _mm256_setr_epi64x(`0`, `16`, `32`, `48`));
5788	assert_eq_m256i(r, _mm256_setr_epi64x(`0`, `16`, `32`, `48`));
5789	}
5790
5791	#[simd_test(enable = "avx2")]
5792	unsafe fn test_mm256_mask_i64gather_epi64() {
5793	let mut arr = [`0i64`; `128`];
5794	for i in `0`..`128i64` {
5795	arr[i as usize] = i;
5796	}
5797	// A multiplier of 8 is word-addressing for i64s
5798	let r = _mm256_mask_i64gather_epi64::<`8`>(
5799	_mm256_set1_epi64x(`256`),
5800	arr.as_ptr(),
5801	_mm256_setr_epi64x(`0`, `16`, `64`, `96`),
5802	_mm256_setr_epi64x(`-1`, `-1`, `-1`, `0`),
5803	);
5804	assert_eq_m256i(r, _mm256_setr_epi64x(`0`, `16`, `64`, `256`));
5805	}
5806
5807	#[simd_test(enable = "avx2")]
5808	unsafe fn test_mm_i64gather_pd() {
5809	let mut arr = [`0.0f64`; `128`];
5810	let mut j = `0.0`;
5811	for i in `0`..`128usize` {
5812	arr[i] = j;
5813	j += `1.0`;
5814	}
5815	// A multiplier of 8 is word-addressing for f64s
5816	let r = _mm_i64gather_pd::<`8`>(arr.as_ptr(), _mm_setr_epi64x(`0`, `16`));
5817	assert_eq_m128d(r, _mm_setr_pd(`0.0`, `16.0`));
5818	}
5819
5820	#[simd_test(enable = "avx2")]
5821	unsafe fn test_mm_mask_i64gather_pd() {
5822	let mut arr = [`0.0f64`; `128`];
5823	let mut j = `0.0`;
5824	for i in `0`..`128usize` {
5825	arr[i] = j;
5826	j += `1.0`;
5827	}
5828	// A multiplier of 8 is word-addressing for f64s
5829	let r = _mm_mask_i64gather_pd::<`8`>(
5830	_mm_set1_pd(`256.0`),
5831	arr.as_ptr(),
5832	_mm_setr_epi64x(`16`, `16`),
5833	_mm_setr_pd(`-1.0`, `0.0`),
5834	);
5835	assert_eq_m128d(r, _mm_setr_pd(`16.0`, `256.0`));
5836	}
5837
5838	#[simd_test(enable = "avx2")]
5839	unsafe fn test_mm256_i64gather_pd() {
5840	let mut arr = [`0.0f64`; `128`];
5841	let mut j = `0.0`;
5842	for i in `0`..`128usize` {
5843	arr[i] = j;
5844	j += `1.0`;
5845	}
5846	// A multiplier of 8 is word-addressing for f64s
5847	let r = _mm256_i64gather_pd::<`8`>(arr.as_ptr(), _mm256_setr_epi64x(`0`, `16`, `32`, `48`));
5848	assert_eq_m256d(r, _mm256_setr_pd(`0.0`, `16.0`, `32.0`, `48.0`));
5849	}
5850
5851	#[simd_test(enable = "avx2")]
5852	unsafe fn test_mm256_mask_i64gather_pd() {
5853	let mut arr = [`0.0f64`; `128`];
5854	let mut j = `0.0`;
5855	for i in `0`..`128usize` {
5856	arr[i] = j;
5857	j += `1.0`;
5858	}
5859	// A multiplier of 8 is word-addressing for f64s
5860	let r = _mm256_mask_i64gather_pd::<`8`>(
5861	_mm256_set1_pd(`256.0`),
5862	arr.as_ptr(),
5863	_mm256_setr_epi64x(`0`, `16`, `64`, `96`),
5864	_mm256_setr_pd(`-1.0`, `-1.0`, `-1.0`, `0.0`),
5865	);
5866	assert_eq_m256d(r, _mm256_setr_pd(`0.0`, `16.0`, `64.0`, `256.0`));
5867	}
5868
5869	#[simd_test(enable = "avx")]
5870	unsafe fn test_mm256_extract_epi8() {
5871	#[rustfmt::skip]
5872	let a = _mm256_setr_epi8(
5873	`-1`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
5874	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
5875	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
5876	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`
5877	);
5878	let r1 = _mm256_extract_epi8::<`0`>(a);
5879	let r2 = _mm256_extract_epi8::<`3`>(a);
5880	assert_eq!(r1, `0xFF`);
5881	assert_eq!(r2, `3`);
5882	}
5883
5884	#[simd_test(enable = "avx2")]
5885	unsafe fn test_mm256_extract_epi16() {
5886	#[rustfmt::skip]
5887	let a = _mm256_setr_epi16(
5888	`-1`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
5889	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
5890	);
5891	let r1 = _mm256_extract_epi16::<`0`>(a);
5892	let r2 = _mm256_extract_epi16::<`3`>(a);
5893	assert_eq!(r1, `0xFFFF`);
5894	assert_eq!(r2, `3`);
5895	}
5896
5897	#[simd_test(enable = "avx2")]
5898	unsafe fn test_mm256_extract_epi32() {
5899	let a = _mm256_setr_epi32(`-1`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
5900	let r1 = _mm256_extract_epi32::<`0`>(a);
5901	let r2 = _mm256_extract_epi32::<`3`>(a);
5902	assert_eq!(r1, `-1`);
5903	assert_eq!(r2, `3`);
5904	}
5905
5906	#[simd_test(enable = "avx2")]
5907	unsafe fn test_mm256_cvtsd_f64() {
5908	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
5909	let r = _mm256_cvtsd_f64(a);
5910	assert_eq!(r, `1.`);
5911	}
5912
5913	#[simd_test(enable = "avx2")]
5914	unsafe fn test_mm256_cvtsi256_si32() {
5915	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
5916	let r = _mm256_cvtsi256_si32(a);
5917	assert_eq!(r, `1`);
5918	}
5919	}
5920

Provided by KDAB

Definitions