avx512vbmi2.rs source code [crates/core_arch/src/x86/avx512vbmi2.rs]

1	use crate::{
2	arch::asm,
3	core_arch::{simd::, x86::},
4	intrinsics::simd::*,
5	};
6
7	#[cfg(test)]
8	use stdarch_test::assert_instr;
9
10	/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11	///
12	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi16)
13	#[inline]
14	#[target_feature(enable = "avx512f,avx512bw,avx512vbmi2")]
15	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16	pub unsafe fn _mm512_mask_expandloadu_epi16(
17	src: __m512i,
18	k: __mmask32,
19	mem_addr: *const i16,
20	) -> __m512i {
21	let mut dst: __m512i = src;
22	asm!(
23	vpl!("vpexpandw {dst}{{{k}}}"),
24	p = in(reg) mem_addr,
25	k = in(kreg) k,
26	dst = inout(zmm_reg) dst,
27	options(pure, readonly, nostack)
28	);
29	dst
30	}
31
32	/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
33	///
34	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi16)
35	#[inline]
36	#[target_feature(enable = "avx512f,avx512bw,avx512vbmi2")]
37	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38	pub unsafe fn _mm512_maskz_expandloadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
39	let mut dst: __m512i;
40	asm!(
41	vpl!("vpexpandw {dst}{{{k}}} {{z}}"),
42	p = in(reg) mem_addr,
43	k = in(kreg) k,
44	dst = out(zmm_reg) dst,
45	options(pure, readonly, nostack)
46	);
47	dst
48	}
49
50	/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
51	///
52	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi16)
53	#[inline]
54	#[target_feature(enable = "avx512f,avx512vbmi2,avx512vl,avx")]
55	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
56	pub unsafe fn _mm256_mask_expandloadu_epi16(
57	src: __m256i,
58	k: __mmask16,
59	mem_addr: *const i16,
60	) -> __m256i {
61	let mut dst: __m256i = src;
62	asm!(
63	vpl!("vpexpandw {dst}{{{k}}}"),
64	p = in(reg) mem_addr,
65	k = in(kreg) k,
66	dst = inout(ymm_reg) dst,
67	options(pure, readonly, nostack)
68	);
69	dst
70	}
71
72	/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
73	///
74	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi16)
75	#[inline]
76	#[target_feature(enable = "avx512f,avx512vbmi2,avx512vl,avx")]
77	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
78	pub unsafe fn _mm256_maskz_expandloadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
79	let mut dst: __m256i;
80	asm!(
81	vpl!("vpexpandw {dst}{{{k}}} {{z}}"),
82	p = in(reg) mem_addr,
83	k = in(kreg) k,
84	dst = out(ymm_reg) dst,
85	options(pure, readonly, nostack)
86	);
87	dst
88	}
89
90	/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
91	///
92	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi16)
93	#[inline]
94	#[target_feature(enable = "avx512f,avx512vbmi2,avx512vl,avx,sse")]
95	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
96	pub unsafe fn _mm_mask_expandloadu_epi16(
97	src: __m128i,
98	k: __mmask8,
99	mem_addr: *const i16,
100	) -> __m128i {
101	let mut dst: __m128i = src;
102	asm!(
103	vpl!("vpexpandw {dst}{{{k}}}"),
104	p = in(reg) mem_addr,
105	k = in(kreg) k,
106	dst = inout(xmm_reg) dst,
107	options(pure, readonly, nostack)
108	);
109	dst
110	}
111
112	/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
113	///
114	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi16)
115	#[inline]
116	#[target_feature(enable = "avx512f,avx512vbmi2,avx512vl,avx,sse")]
117	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
118	pub unsafe fn _mm_maskz_expandloadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
119	let mut dst: __m128i;
120	asm!(
121	vpl!("vpexpandw {dst}{{{k}}} {{z}}"),
122	p = in(reg) mem_addr,
123	k = in(kreg) k,
124	dst = out(xmm_reg) dst,
125	options(pure, readonly, nostack)
126	);
127	dst
128	}
129
130	/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
131	///
132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi8)
133	#[inline]
134	#[target_feature(enable = "avx512f,avx512bw,avx512vbmi2")]
135	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
136	pub unsafe fn _mm512_mask_expandloadu_epi8(
137	src: __m512i,
138	k: __mmask64,
139	mem_addr: *const i8,
140	) -> __m512i {
141	let mut dst: __m512i = src;
142	asm!(
143	vpl!("vpexpandb {dst}{{{k}}}"),
144	p = in(reg) mem_addr,
145	k = in(kreg) k,
146	dst = inout(zmm_reg) dst,
147	options(pure, readonly, nostack)
148	);
149	dst
150	}
151
152	/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
153	///
154	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi8)
155	#[inline]
156	#[target_feature(enable = "avx512f,avx512bw,avx512vbmi2")]
157	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
158	pub unsafe fn _mm512_maskz_expandloadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
159	let mut dst: __m512i;
160	asm!(
161	vpl!("vpexpandb {dst}{{{k}}} {{z}}"),
162	p = in(reg) mem_addr,
163	k = in(kreg) k,
164	dst = out(zmm_reg) dst,
165	options(pure, readonly, nostack)
166	);
167	dst
168	}
169
170	/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
171	///
172	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi8)
173	#[inline]
174	#[target_feature(enable = "avx512f,avx512bw,avx512vbmi2,avx512vl,avx")]
175	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
176	pub unsafe fn _mm256_mask_expandloadu_epi8(
177	src: __m256i,
178	k: __mmask32,
179	mem_addr: *const i8,
180	) -> __m256i {
181	let mut dst: __m256i = src;
182	asm!(
183	vpl!("vpexpandb {dst}{{{k}}}"),
184	p = in(reg) mem_addr,
185	k = in(kreg) k,
186	dst = inout(ymm_reg) dst,
187	options(pure, readonly, nostack)
188	);
189	dst
190	}
191
192	/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
193	///
194	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi8)
195	#[inline]
196	#[target_feature(enable = "avx512f,avx512bw,avx512vbmi2,avx512vl,avx")]
197	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
198	pub unsafe fn _mm256_maskz_expandloadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
199	let mut dst: __m256i;
200	asm!(
201	vpl!("vpexpandb {dst}{{{k}}} {{z}}"),
202	p = in(reg) mem_addr,
203	k = in(kreg) k,
204	dst = out(ymm_reg) dst,
205	options(pure, readonly, nostack)
206	);
207	dst
208	}
209
210	/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
211	///
212	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi8)
213	#[inline]
214	#[target_feature(enable = "avx512f,avx512vbmi2,avx512vl,avx,sse")]
215	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
216	pub unsafe fn _mm_mask_expandloadu_epi8(
217	src: __m128i,
218	k: __mmask16,
219	mem_addr: *const i8,
220	) -> __m128i {
221	let mut dst: __m128i = src;
222	asm!(
223	vpl!("vpexpandb {dst}{{{k}}}"),
224	p = in(reg) mem_addr,
225	k = in(kreg) k,
226	dst = inout(xmm_reg) dst,
227	options(pure, readonly, nostack)
228	);
229	dst
230	}
231
232	/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
233	///
234	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi8)
235	#[inline]
236	#[target_feature(enable = "avx512f,avx512vbmi2,avx512vl,avx,sse")]
237	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
238	pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
239	let mut dst: __m128i;
240	asm!(
241	vpl!("vpexpandb {dst}{{{k}}} {{z}}"),
242	p = in(reg) mem_addr,
243	k = in(kreg) k,
244	dst = out(xmm_reg) dst,
245	options(pure, readonly, nostack)
246	);
247	dst
248	}
249
250	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
251	///
252	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi16)
253	#[inline]
254	#[target_feature(enable = "avx512vbmi2")]
255	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
256	#[cfg_attr(test, assert_instr(vpcompressw))]
257	pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask32, a: __m512i) {
258	vcompressstorew(mem:base_addr as *mut _, data:a.as_i16x32(), mask:k)
259	}
260
261	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
262	///
263	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi16)
264	#[inline]
265	#[target_feature(enable = "avx512vbmi2,avx512vl")]
266	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
267	#[cfg_attr(test, assert_instr(vpcompressw))]
268	pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask16, a: __m256i) {
269	vcompressstorew256(mem:base_addr as *mut _, data:a.as_i16x16(), mask:k)
270	}
271
272	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
273	///
274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi16)
275	#[inline]
276	#[target_feature(enable = "avx512vbmi2,avx512vl")]
277	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
278	#[cfg_attr(test, assert_instr(vpcompressw))]
279	pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask8, a: __m128i) {
280	vcompressstorew128(mem:base_addr as *mut _, data:a.as_i16x8(), mask:k)
281	}
282
283	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
284	///
285	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi8)
286	#[inline]
287	#[target_feature(enable = "avx512vbmi2")]
288	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
289	#[cfg_attr(test, assert_instr(vpcompressb))]
290	pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask64, a: __m512i) {
291	vcompressstoreb(mem:base_addr as *mut _, data:a.as_i8x64(), mask:k)
292	}
293
294	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
295	///
296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi8)
297	#[inline]
298	#[target_feature(enable = "avx512vbmi2,avx512vl")]
299	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
300	#[cfg_attr(test, assert_instr(vpcompressb))]
301	pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask32, a: __m256i) {
302	vcompressstoreb256(mem:base_addr as *mut _, data:a.as_i8x32(), mask:k)
303	}
304
305	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
306	///
307	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi8)
308	#[inline]
309	#[target_feature(enable = "avx512vbmi2,avx512vl")]
310	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
311	#[cfg_attr(test, assert_instr(vpcompressb))]
312	pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask16, a: __m128i) {
313	vcompressstoreb128(mem:base_addr as *mut _, data:a.as_i8x16(), mask:k)
314	}
315
316	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
317	///
318	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi16&expand=1192)
319	#[inline]
320	#[target_feature(enable = "avx512vbmi2")]
321	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
322	#[cfg_attr(test, assert_instr(vpcompressw))]
323	pub unsafe fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
324	transmute(src:vpcompressw(a:a.as_i16x32(), src:src.as_i16x32(), mask:k))
325	}
326
327	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
328	///
329	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi16&expand=1193)
330	#[inline]
331	#[target_feature(enable = "avx512vbmi2")]
332	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
333	#[cfg_attr(test, assert_instr(vpcompressw))]
334	pub unsafe fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i {
335	transmute(src:vpcompressw(
336	a:a.as_i16x32(),
337	src:_mm512_setzero_si512().as_i16x32(),
338	mask:k,
339	))
340	}
341
342	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
343	///
344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi16&expand=1190)
345	#[inline]
346	#[target_feature(enable = "avx512vbmi2,avx512vl")]
347	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
348	#[cfg_attr(test, assert_instr(vpcompressw))]
349	pub unsafe fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
350	transmute(src:vpcompressw256(a:a.as_i16x16(), src:src.as_i16x16(), mask:k))
351	}
352
353	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
354	///
355	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi16&expand=1191)
356	#[inline]
357	#[target_feature(enable = "avx512vbmi2,avx512vl")]
358	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
359	#[cfg_attr(test, assert_instr(vpcompressw))]
360	pub unsafe fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i {
361	transmute(src:vpcompressw256(
362	a:a.as_i16x16(),
363	src:_mm256_setzero_si256().as_i16x16(),
364	mask:k,
365	))
366	}
367
368	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
369	///
370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi16&expand=1188)
371	#[inline]
372	#[target_feature(enable = "avx512vbmi2,avx512vl")]
373	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
374	#[cfg_attr(test, assert_instr(vpcompressw))]
375	pub unsafe fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
376	transmute(src:vpcompressw128(a:a.as_i16x8(), src:src.as_i16x8(), mask:k))
377	}
378
379	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
380	///
381	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi16&expand=1189)
382	#[inline]
383	#[target_feature(enable = "avx512vbmi2,avx512vl")]
384	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
385	#[cfg_attr(test, assert_instr(vpcompressw))]
386	pub unsafe fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i {
387	transmute(src:vpcompressw128(
388	a:a.as_i16x8(),
389	src:_mm_setzero_si128().as_i16x8(),
390	mask:k,
391	))
392	}
393
394	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
395	///
396	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi8&expand=1210)
397	#[inline]
398	#[target_feature(enable = "avx512vbmi2")]
399	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
400	#[cfg_attr(test, assert_instr(vpcompressb))]
401	pub unsafe fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
402	transmute(src:vpcompressb(a:a.as_i8x64(), src:src.as_i8x64(), mask:k))
403	}
404
405	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
406	///
407	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi8&expand=1211)
408	#[inline]
409	#[target_feature(enable = "avx512vbmi2")]
410	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
411	#[cfg_attr(test, assert_instr(vpcompressb))]
412	pub unsafe fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i {
413	transmute(src:vpcompressb(
414	a:a.as_i8x64(),
415	src:_mm512_setzero_si512().as_i8x64(),
416	mask:k,
417	))
418	}
419
420	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
421	///
422	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi8&expand=1208)
423	#[inline]
424	#[target_feature(enable = "avx512vbmi2,avx512vl")]
425	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
426	#[cfg_attr(test, assert_instr(vpcompressb))]
427	pub unsafe fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
428	transmute(src:vpcompressb256(a:a.as_i8x32(), src:src.as_i8x32(), mask:k))
429	}
430
431	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
432	///
433	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi8&expand=1209)
434	#[inline]
435	#[target_feature(enable = "avx512vbmi2,avx512vl")]
436	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
437	#[cfg_attr(test, assert_instr(vpcompressb))]
438	pub unsafe fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i {
439	transmute(src:vpcompressb256(
440	a:a.as_i8x32(),
441	src:_mm256_setzero_si256().as_i8x32(),
442	mask:k,
443	))
444	}
445
446	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
447	///
448	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi8&expand=1206)
449	#[inline]
450	#[target_feature(enable = "avx512vbmi2,avx512vl")]
451	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
452	#[cfg_attr(test, assert_instr(vpcompressb))]
453	pub unsafe fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
454	transmute(src:vpcompressb128(a:a.as_i8x16(), src:src.as_i8x16(), mask:k))
455	}
456
457	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
458	///
459	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi8&expand=1207)
460	#[inline]
461	#[target_feature(enable = "avx512vbmi2,avx512vl")]
462	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
463	#[cfg_attr(test, assert_instr(vpcompressb))]
464	pub unsafe fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i {
465	transmute(src:vpcompressb128(
466	a:a.as_i8x16(),
467	src:_mm_setzero_si128().as_i8x16(),
468	mask:k,
469	))
470	}
471
472	/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
473	///
474	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi16&expand=2310)
475	#[inline]
476	#[target_feature(enable = "avx512vbmi2")]
477	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
478	#[cfg_attr(test, assert_instr(vpexpandw))]
479	pub unsafe fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
480	transmute(src:vpexpandw(a:a.as_i16x32(), src:src.as_i16x32(), mask:k))
481	}
482
483	/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
484	///
485	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi16&expand=2311)
486	#[inline]
487	#[target_feature(enable = "avx512vbmi2")]
488	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
489	#[cfg_attr(test, assert_instr(vpexpandw))]
490	pub unsafe fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i {
491	transmute(src:vpexpandw(
492	a:a.as_i16x32(),
493	src:_mm512_setzero_si512().as_i16x32(),
494	mask:k,
495	))
496	}
497
498	/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
499	///
500	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi16&expand=2308)
501	#[inline]
502	#[target_feature(enable = "avx512vbmi2,avx512vl")]
503	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
504	#[cfg_attr(test, assert_instr(vpexpandw))]
505	pub unsafe fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
506	transmute(src:vpexpandw256(a:a.as_i16x16(), src:src.as_i16x16(), mask:k))
507	}
508
509	/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
510	///
511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi16&expand=2309)
512	#[inline]
513	#[target_feature(enable = "avx512vbmi2,avx512vl")]
514	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
515	#[cfg_attr(test, assert_instr(vpexpandw))]
516	pub unsafe fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i {
517	transmute(src:vpexpandw256(
518	a:a.as_i16x16(),
519	src:_mm256_setzero_si256().as_i16x16(),
520	mask:k,
521	))
522	}
523
524	/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
525	///
526	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi16&expand=2306)
527	#[inline]
528	#[target_feature(enable = "avx512vbmi2,avx512vl")]
529	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
530	#[cfg_attr(test, assert_instr(vpexpandw))]
531	pub unsafe fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
532	transmute(src:vpexpandw128(a:a.as_i16x8(), src:src.as_i16x8(), mask:k))
533	}
534
535	/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
536	///
537	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi16&expand=2307)
538	#[inline]
539	#[target_feature(enable = "avx512vbmi2,avx512vl")]
540	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
541	#[cfg_attr(test, assert_instr(vpexpandw))]
542	pub unsafe fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i {
543	transmute(src:vpexpandw128(
544	a:a.as_i16x8(),
545	src:_mm_setzero_si128().as_i16x8(),
546	mask:k,
547	))
548	}
549
550	/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
551	///
552	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi8&expand=2328)
553	#[inline]
554	#[target_feature(enable = "avx512vbmi2")]
555	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
556	#[cfg_attr(test, assert_instr(vpexpandb))]
557	pub unsafe fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
558	transmute(src:vpexpandb(a:a.as_i8x64(), src:src.as_i8x64(), mask:k))
559	}
560
561	/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
562	///
563	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi8&expand=2329)
564	#[inline]
565	#[target_feature(enable = "avx512vbmi2")]
566	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
567	#[cfg_attr(test, assert_instr(vpexpandb))]
568	pub unsafe fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i {
569	transmute(src:vpexpandb(
570	a:a.as_i8x64(),
571	src:_mm512_setzero_si512().as_i8x64(),
572	mask:k,
573	))
574	}
575
576	/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
577	///
578	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi8&expand=2326)
579	#[inline]
580	#[target_feature(enable = "avx512vbmi2,avx512vl")]
581	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
582	#[cfg_attr(test, assert_instr(vpexpandb))]
583	pub unsafe fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
584	transmute(src:vpexpandb256(a:a.as_i8x32(), src:src.as_i8x32(), mask:k))
585	}
586
587	/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
588	///
589	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi8&expand=2327)
590	#[inline]
591	#[target_feature(enable = "avx512vbmi2,avx512vl")]
592	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
593	#[cfg_attr(test, assert_instr(vpexpandb))]
594	pub unsafe fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i {
595	transmute(src:vpexpandb256(
596	a:a.as_i8x32(),
597	src:_mm256_setzero_si256().as_i8x32(),
598	mask:k,
599	))
600	}
601
602	/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
603	///
604	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi8&expand=2324)
605	#[inline]
606	#[target_feature(enable = "avx512vbmi2,avx512vl")]
607	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
608	#[cfg_attr(test, assert_instr(vpexpandb))]
609	pub unsafe fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
610	transmute(src:vpexpandb128(a:a.as_i8x16(), src:src.as_i8x16(), mask:k))
611	}
612
613	/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
614	///
615	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi8&expand=2325)
616	#[inline]
617	#[target_feature(enable = "avx512vbmi2,avx512vl")]
618	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
619	#[cfg_attr(test, assert_instr(vpexpandb))]
620	pub unsafe fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i {
621	transmute(src:vpexpandb128(
622	a:a.as_i8x16(),
623	src:_mm_setzero_si128().as_i8x16(),
624	mask:k,
625	))
626	}
627
628	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
629	///
630	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi64&expand=5087)
631	#[inline]
632	#[target_feature(enable = "avx512vbmi2")]
633	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
634	#[cfg_attr(test, assert_instr(vpshldvq))]
635	pub unsafe fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
636	transmute(src:vpshldvq(a:a.as_i64x8(), b:b.as_i64x8(), c:c.as_i64x8()))
637	}
638
639	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
640	///
641	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi64&expand=5085)
642	#[inline]
643	#[target_feature(enable = "avx512vbmi2")]
644	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
645	#[cfg_attr(test, assert_instr(vpshldvq))]
646	pub unsafe fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
647	let shf: i64x8 = _mm512_shldv_epi64(a, b, c).as_i64x8();
648	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x8()))
649	}
650
651	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
652	///
653	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi64&expand=5086)
654	#[inline]
655	#[target_feature(enable = "avx512vbmi2")]
656	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
657	#[cfg_attr(test, assert_instr(vpshldvq))]
658	pub unsafe fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
659	let shf: i64x8 = _mm512_shldv_epi64(a, b, c).as_i64x8();
660	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
661	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
662	}
663
664	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
665	///
666	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi64&expand=5084)
667	#[inline]
668	#[target_feature(enable = "avx512vbmi2,avx512vl")]
669	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
670	#[cfg_attr(test, assert_instr(vpshldvq))]
671	pub unsafe fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
672	transmute(src:vpshldvq256(a:a.as_i64x4(), b:b.as_i64x4(), c:c.as_i64x4()))
673	}
674
675	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
676	///
677	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi64&expand=5082)
678	#[inline]
679	#[target_feature(enable = "avx512vbmi2,avx512vl")]
680	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
681	#[cfg_attr(test, assert_instr(vpshldvq))]
682	pub unsafe fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
683	let shf: i64x4 = _mm256_shldv_epi64(a, b, c).as_i64x4();
684	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x4()))
685	}
686
687	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
688	///
689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi64&expand=5083)
690	#[inline]
691	#[target_feature(enable = "avx512vbmi2,avx512vl")]
692	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
693	#[cfg_attr(test, assert_instr(vpshldvq))]
694	pub unsafe fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
695	let shf: i64x4 = _mm256_shldv_epi64(a, b, c).as_i64x4();
696	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
697	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
698	}
699
700	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
701	///
702	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi64&expand=5081)
703	#[inline]
704	#[target_feature(enable = "avx512vbmi2,avx512vl")]
705	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
706	#[cfg_attr(test, assert_instr(vpshldvq))]
707	pub unsafe fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
708	transmute(src:vpshldvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:c.as_i64x2()))
709	}
710
711	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
712	///
713	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi64&expand=5079)
714	#[inline]
715	#[target_feature(enable = "avx512vbmi2,avx512vl")]
716	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
717	#[cfg_attr(test, assert_instr(vpshldvq))]
718	pub unsafe fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
719	let shf: i64x2 = _mm_shldv_epi64(a, b, c).as_i64x2();
720	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x2()))
721	}
722
723	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
724	///
725	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi64&expand=5080)
726	#[inline]
727	#[target_feature(enable = "avx512vbmi2,avx512vl")]
728	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
729	#[cfg_attr(test, assert_instr(vpshldvq))]
730	pub unsafe fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
731	let shf: i64x2 = _mm_shldv_epi64(a, b, c).as_i64x2();
732	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
733	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
734	}
735
736	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
737	///
738	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi32&expand=5078)
739	#[inline]
740	#[target_feature(enable = "avx512vbmi2")]
741	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
742	#[cfg_attr(test, assert_instr(vpshldvd))]
743	pub unsafe fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
744	transmute(src:vpshldvd(a:a.as_i32x16(), b:b.as_i32x16(), c:c.as_i32x16()))
745	}
746
747	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
748	///
749	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi32&expand=5076)
750	#[inline]
751	#[target_feature(enable = "avx512vbmi2")]
752	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
753	#[cfg_attr(test, assert_instr(vpshldvd))]
754	pub unsafe fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
755	let shf: i32x16 = _mm512_shldv_epi32(a, b, c).as_i32x16();
756	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x16()))
757	}
758
759	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
760	///
761	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi32&expand=5077)
762	#[inline]
763	#[target_feature(enable = "avx512vbmi2")]
764	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
765	#[cfg_attr(test, assert_instr(vpshldvd))]
766	pub unsafe fn _mm512_maskz_shldv_epi32(
767	k: __mmask16,
768	a: __m512i,
769	b: __m512i,
770	c: __m512i,
771	) -> __m512i {
772	let shf: i32x16 = _mm512_shldv_epi32(a, b, c).as_i32x16();
773	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
774	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
775	}
776
777	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
778	///
779	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi32&expand=5075)
780	#[inline]
781	#[target_feature(enable = "avx512vbmi2,avx512vl")]
782	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
783	#[cfg_attr(test, assert_instr(vpshldvd))]
784	pub unsafe fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
785	transmute(src:vpshldvd256(a:a.as_i32x8(), b:b.as_i32x8(), c:c.as_i32x8()))
786	}
787
788	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
789	///
790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi32&expand=5073)
791	#[inline]
792	#[target_feature(enable = "avx512vbmi2,avx512vl")]
793	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
794	#[cfg_attr(test, assert_instr(vpshldvd))]
795	pub unsafe fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
796	let shf: i32x8 = _mm256_shldv_epi32(a, b, c).as_i32x8();
797	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x8()))
798	}
799
800	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
801	///
802	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi32&expand=5074)
803	#[inline]
804	#[target_feature(enable = "avx512vbmi2,avx512vl")]
805	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
806	#[cfg_attr(test, assert_instr(vpshldvd))]
807	pub unsafe fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
808	let shf: i32x8 = _mm256_shldv_epi32(a, b, c).as_i32x8();
809	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
810	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
811	}
812
813	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
814	///
815	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi32&expand=5072)
816	#[inline]
817	#[target_feature(enable = "avx512vbmi2,avx512vl")]
818	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
819	#[cfg_attr(test, assert_instr(vpshldvd))]
820	pub unsafe fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
821	transmute(src:vpshldvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:c.as_i32x4()))
822	}
823
824	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
825	///
826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi32&expand=5070)
827	#[inline]
828	#[target_feature(enable = "avx512vbmi2,avx512vl")]
829	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
830	#[cfg_attr(test, assert_instr(vpshldvd))]
831	pub unsafe fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
832	let shf: i32x4 = _mm_shldv_epi32(a, b, c).as_i32x4();
833	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x4()))
834	}
835
836	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
837	///
838	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi32&expand=5071)
839	#[inline]
840	#[target_feature(enable = "avx512vbmi2,avx512vl")]
841	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
842	#[cfg_attr(test, assert_instr(vpshldvd))]
843	pub unsafe fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
844	let shf: i32x4 = _mm_shldv_epi32(a, b, c).as_i32x4();
845	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
846	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
847	}
848
849	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
850	///
851	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi16&expand=5069)
852	#[inline]
853	#[target_feature(enable = "avx512vbmi2")]
854	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
855	#[cfg_attr(test, assert_instr(vpshldvw))]
856	pub unsafe fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
857	transmute(src:vpshldvw(a:a.as_i16x32(), b:b.as_i16x32(), c:c.as_i16x32()))
858	}
859
860	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
861	///
862	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi16&expand=5067)
863	#[inline]
864	#[target_feature(enable = "avx512vbmi2")]
865	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
866	#[cfg_attr(test, assert_instr(vpshldvw))]
867	pub unsafe fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
868	let shf: i16x32 = _mm512_shldv_epi16(a, b, c).as_i16x32();
869	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x32()))
870	}
871
872	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
873	///
874	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi16&expand=5068)
875	#[inline]
876	#[target_feature(enable = "avx512vbmi2")]
877	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
878	#[cfg_attr(test, assert_instr(vpshldvw))]
879	pub unsafe fn _mm512_maskz_shldv_epi16(
880	k: __mmask32,
881	a: __m512i,
882	b: __m512i,
883	c: __m512i,
884	) -> __m512i {
885	let shf: i16x32 = _mm512_shldv_epi16(a, b, c).as_i16x32();
886	let zero: i16x32 = _mm512_setzero_si512().as_i16x32();
887	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
888	}
889
890	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
891	///
892	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi16&expand=5066)
893	#[inline]
894	#[target_feature(enable = "avx512vbmi2,avx512vl")]
895	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
896	#[cfg_attr(test, assert_instr(vpshldvw))]
897	pub unsafe fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
898	transmute(src:vpshldvw256(a:a.as_i16x16(), b:b.as_i16x16(), c:c.as_i16x16()))
899	}
900
901	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
902	///
903	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi16&expand=5064)
904	#[inline]
905	#[target_feature(enable = "avx512vbmi2,avx512vl")]
906	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
907	#[cfg_attr(test, assert_instr(vpshldvw))]
908	pub unsafe fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
909	let shf: i16x16 = _mm256_shldv_epi16(a, b, c).as_i16x16();
910	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x16()))
911	}
912
913	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
914	///
915	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi16&expand=5065)
916	#[inline]
917	#[target_feature(enable = "avx512vbmi2,avx512vl")]
918	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
919	#[cfg_attr(test, assert_instr(vpshldvw))]
920	pub unsafe fn _mm256_maskz_shldv_epi16(
921	k: __mmask16,
922	a: __m256i,
923	b: __m256i,
924	c: __m256i,
925	) -> __m256i {
926	let shf: i16x16 = _mm256_shldv_epi16(a, b, c).as_i16x16();
927	let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
928	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
929	}
930
931	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
932	///
933	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi16&expand=5063)
934	#[inline]
935	#[target_feature(enable = "avx512vbmi2,avx512vl")]
936	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
937	#[cfg_attr(test, assert_instr(vpshldvw))]
938	pub unsafe fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
939	transmute(src:vpshldvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:c.as_i16x8()))
940	}
941
942	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
943	///
944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi16&expand=5061)
945	#[inline]
946	#[target_feature(enable = "avx512vbmi2,avx512vl")]
947	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
948	#[cfg_attr(test, assert_instr(vpshldvw))]
949	pub unsafe fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
950	let shf: i16x8 = _mm_shldv_epi16(a, b, c).as_i16x8();
951	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x8()))
952	}
953
954	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
955	///
956	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi16&expand=5062)
957	#[inline]
958	#[target_feature(enable = "avx512vbmi2,avx512vl")]
959	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
960	#[cfg_attr(test, assert_instr(vpshldvw))]
961	pub unsafe fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
962	let shf: i16x8 = _mm_shldv_epi16(a, b, c).as_i16x8();
963	let zero: i16x8 = _mm_setzero_si128().as_i16x8();
964	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
965	}
966
967	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
968	///
969	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi64&expand=5141)
970	#[inline]
971	#[target_feature(enable = "avx512vbmi2")]
972	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
973	#[cfg_attr(test, assert_instr(vpshrdvq))]
974	pub unsafe fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
975	transmute(src:vpshrdvq(a:a.as_i64x8(), b:b.as_i64x8(), c:c.as_i64x8()))
976	}
977
978	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
979	///
980	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi64&expand=5139)
981	#[inline]
982	#[target_feature(enable = "avx512vbmi2")]
983	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
984	#[cfg_attr(test, assert_instr(vpshrdvq))]
985	pub unsafe fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
986	let shf: i64x8 = _mm512_shrdv_epi64(a, b, c).as_i64x8();
987	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x8()))
988	}
989
990	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
991	///
992	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi64&expand=5140)
993	#[inline]
994	#[target_feature(enable = "avx512vbmi2")]
995	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
996	#[cfg_attr(test, assert_instr(vpshrdvq))]
997	pub unsafe fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
998	let shf: i64x8 = _mm512_shrdv_epi64(a, b, c).as_i64x8();
999	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
1000	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1001	}
1002
1003	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
1004	///
1005	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi64&expand=5138)
1006	#[inline]
1007	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1008	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1009	#[cfg_attr(test, assert_instr(vpshrdvq))]
1010	pub unsafe fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1011	transmute(src:vpshrdvq256(a:a.as_i64x4(), b:b.as_i64x4(), c:c.as_i64x4()))
1012	}
1013
1014	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1015	///
1016	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi64&expand=5136)
1017	#[inline]
1018	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1019	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1020	#[cfg_attr(test, assert_instr(vpshrdvq))]
1021	pub unsafe fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
1022	let shf: i64x4 = _mm256_shrdv_epi64(a, b, c).as_i64x4();
1023	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x4()))
1024	}
1025
1026	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1027	///
1028	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi64&expand=5137)
1029	#[inline]
1030	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1031	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1032	#[cfg_attr(test, assert_instr(vpshrdvq))]
1033	pub unsafe fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1034	let shf: i64x4 = _mm256_shrdv_epi64(a, b, c).as_i64x4();
1035	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1036	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1037	}
1038
1039	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
1040	///
1041	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi64&expand=5135)
1042	#[inline]
1043	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1044	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1045	#[cfg_attr(test, assert_instr(vpshrdvq))]
1046	pub unsafe fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1047	transmute(src:vpshrdvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:c.as_i64x2()))
1048	}
1049
1050	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1051	///
1052	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi64&expand=5133)
1053	#[inline]
1054	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1055	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1056	#[cfg_attr(test, assert_instr(vpshrdvq))]
1057	pub unsafe fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1058	let shf: i64x2 = _mm_shrdv_epi64(a, b, c).as_i64x2();
1059	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x2()))
1060	}
1061
1062	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1063	///
1064	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi64&expand=5134)
1065	#[inline]
1066	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1067	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1068	#[cfg_attr(test, assert_instr(vpshrdvq))]
1069	pub unsafe fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1070	let shf: i64x2 = _mm_shrdv_epi64(a, b, c).as_i64x2();
1071	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1072	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1073	}
1074
1075	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1076	///
1077	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi32&expand=5132)
1078	#[inline]
1079	#[target_feature(enable = "avx512vbmi2")]
1080	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1081	#[cfg_attr(test, assert_instr(vpshrdvd))]
1082	pub unsafe fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1083	transmute(src:vpshrdvd(a:a.as_i32x16(), b:b.as_i32x16(), c:c.as_i32x16()))
1084	}
1085
1086	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1087	///
1088	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi32&expand=5130)
1089	#[inline]
1090	#[target_feature(enable = "avx512vbmi2")]
1091	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1092	#[cfg_attr(test, assert_instr(vpshrdvd))]
1093	pub unsafe fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
1094	let shf: i32x16 = _mm512_shrdv_epi32(a, b, c).as_i32x16();
1095	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x16()))
1096	}
1097
1098	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1099	///
1100	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi32&expand=5131)
1101	#[inline]
1102	#[target_feature(enable = "avx512vbmi2")]
1103	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1104	#[cfg_attr(test, assert_instr(vpshrdvd))]
1105	pub unsafe fn _mm512_maskz_shrdv_epi32(
1106	k: __mmask16,
1107	a: __m512i,
1108	b: __m512i,
1109	c: __m512i,
1110	) -> __m512i {
1111	let shf: i32x16 = _mm512_shrdv_epi32(a, b, c).as_i32x16();
1112	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
1113	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1114	}
1115
1116	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1117	///
1118	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi32&expand=5129)
1119	#[inline]
1120	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1121	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1122	#[cfg_attr(test, assert_instr(vpshrdvd))]
1123	pub unsafe fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1124	transmute(src:vpshrdvd256(a:a.as_i32x8(), b:b.as_i32x8(), c:c.as_i32x8()))
1125	}
1126
1127	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1128	///
1129	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi32&expand=5127)
1130	#[inline]
1131	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1132	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1133	#[cfg_attr(test, assert_instr(vpshrdvd))]
1134	pub unsafe fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
1135	let shf: i32x8 = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1136	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x8()))
1137	}
1138
1139	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1140	///
1141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi32&expand=5128)
1142	#[inline]
1143	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1144	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1145	#[cfg_attr(test, assert_instr(vpshrdvd))]
1146	pub unsafe fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1147	let shf: i32x8 = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1148	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
1149	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1150	}
1151
1152	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1153	///
1154	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi32&expand=5126)
1155	#[inline]
1156	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1157	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1158	#[cfg_attr(test, assert_instr(vpshrdvd))]
1159	pub unsafe fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1160	transmute(src:vpshrdvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:c.as_i32x4()))
1161	}
1162
1163	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1164	///
1165	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi32&expand=5124)
1166	#[inline]
1167	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1168	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1169	#[cfg_attr(test, assert_instr(vpshrdvd))]
1170	pub unsafe fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1171	let shf: i32x4 = _mm_shrdv_epi32(a, b, c).as_i32x4();
1172	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x4()))
1173	}
1174
1175	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1176	///
1177	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi32&expand=5125)
1178	#[inline]
1179	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1180	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1181	#[cfg_attr(test, assert_instr(vpshrdvd))]
1182	pub unsafe fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1183	let shf: i32x4 = _mm_shrdv_epi32(a, b, c).as_i32x4();
1184	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1185	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1186	}
1187
1188	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1189	///
1190	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi16&expand=5123)
1191	#[inline]
1192	#[target_feature(enable = "avx512vbmi2")]
1193	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1194	#[cfg_attr(test, assert_instr(vpshrdvw))]
1195	pub unsafe fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1196	transmute(src:vpshrdvw(a:a.as_i16x32(), b:b.as_i16x32(), c:c.as_i16x32()))
1197	}
1198
1199	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1200	///
1201	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi16&expand=5121)
1202	#[inline]
1203	#[target_feature(enable = "avx512vbmi2")]
1204	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1205	#[cfg_attr(test, assert_instr(vpshrdvw))]
1206	pub unsafe fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
1207	let shf: i16x32 = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1208	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x32()))
1209	}
1210
1211	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1212	///
1213	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi16&expand=5122)
1214	#[inline]
1215	#[target_feature(enable = "avx512vbmi2")]
1216	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1217	#[cfg_attr(test, assert_instr(vpshrdvw))]
1218	pub unsafe fn _mm512_maskz_shrdv_epi16(
1219	k: __mmask32,
1220	a: __m512i,
1221	b: __m512i,
1222	c: __m512i,
1223	) -> __m512i {
1224	let shf: i16x32 = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1225	let zero: i16x32 = _mm512_setzero_si512().as_i16x32();
1226	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1227	}
1228
1229	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1230	///
1231	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi16&expand=5120)
1232	#[inline]
1233	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1234	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1235	#[cfg_attr(test, assert_instr(vpshrdvw))]
1236	pub unsafe fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1237	transmute(src:vpshrdvw256(a:a.as_i16x16(), b:b.as_i16x16(), c:c.as_i16x16()))
1238	}
1239
1240	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1241	///
1242	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi16&expand=5118)
1243	#[inline]
1244	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1245	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1246	#[cfg_attr(test, assert_instr(vpshrdvw))]
1247	pub unsafe fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
1248	let shf: i16x16 = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1249	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x16()))
1250	}
1251
1252	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1253	///
1254	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi16&expand=5119)
1255	#[inline]
1256	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1257	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1258	#[cfg_attr(test, assert_instr(vpshrdvw))]
1259	pub unsafe fn _mm256_maskz_shrdv_epi16(
1260	k: __mmask16,
1261	a: __m256i,
1262	b: __m256i,
1263	c: __m256i,
1264	) -> __m256i {
1265	let shf: i16x16 = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1266	let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
1267	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1268	}
1269
1270	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1271	///
1272	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi16&expand=5117)
1273	#[inline]
1274	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1275	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1276	#[cfg_attr(test, assert_instr(vpshrdvw))]
1277	pub unsafe fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1278	transmute(src:vpshrdvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:c.as_i16x8()))
1279	}
1280
1281	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1282	///
1283	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi16&expand=5115)
1284	#[inline]
1285	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1286	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1287	#[cfg_attr(test, assert_instr(vpshrdvw))]
1288	pub unsafe fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1289	let shf: i16x8 = _mm_shrdv_epi16(a, b, c).as_i16x8();
1290	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x8()))
1291	}
1292
1293	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1294	///
1295	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi16&expand=5116)
1296	#[inline]
1297	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1298	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1299	#[cfg_attr(test, assert_instr(vpshrdvw))]
1300	pub unsafe fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1301	let shf: i16x8 = _mm_shrdv_epi16(a, b, c).as_i16x8();
1302	let zero: i16x8 = _mm_setzero_si128().as_i16x8();
1303	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1304	}
1305
1306	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1307	///
1308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi64&expand=5060)
1309	#[inline]
1310	#[target_feature(enable = "avx512vbmi2")]
1311	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1312	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1313	#[rustc_legacy_const_generics(`2`)]
1314	pub unsafe fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1315	static_assert_uimm_bits!(IMM8, `8`);
1316	let imm8: i64 = IMM8 as i64;
1317	transmute(src:vpshldvq(
1318	a:a.as_i64x8(),
1319	b:b.as_i64x8(),
1320	c:_mm512_set1_epi64(imm8).as_i64x8(),
1321	))
1322	}
1323
1324	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1325	///
1326	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi64&expand=5058)
1327	#[inline]
1328	#[target_feature(enable = "avx512vbmi2")]
1329	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1330	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1331	#[rustc_legacy_const_generics(`4`)]
1332	pub unsafe fn _mm512_mask_shldi_epi64<const IMM8: i32>(
1333	src: __m512i,
1334	k: __mmask8,
1335	a: __m512i,
1336	b: __m512i,
1337	) -> __m512i {
1338	static_assert_uimm_bits!(IMM8, `8`);
1339	let imm8: i64 = IMM8 as i64;
1340	let shf: i64x8 = vpshldvq(
1341	a:a.as_i64x8(),
1342	b:b.as_i64x8(),
1343	c:_mm512_set1_epi64(imm8).as_i64x8(),
1344	);
1345	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
1346	}
1347
1348	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1349	///
1350	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi64&expand=5059)
1351	#[inline]
1352	#[target_feature(enable = "avx512vbmi2")]
1353	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1354	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1355	#[rustc_legacy_const_generics(`3`)]
1356	pub unsafe fn _mm512_maskz_shldi_epi64<const IMM8: i32>(
1357	k: __mmask8,
1358	a: __m512i,
1359	b: __m512i,
1360	) -> __m512i {
1361	static_assert_uimm_bits!(IMM8, `8`);
1362	let imm8: i64 = IMM8 as i64;
1363	let shf: i64x8 = vpshldvq(
1364	a:a.as_i64x8(),
1365	b:b.as_i64x8(),
1366	c:_mm512_set1_epi64(imm8).as_i64x8(),
1367	);
1368	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
1369	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1370	}
1371
1372	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1373	///
1374	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi64&expand=5057)
1375	#[inline]
1376	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1377	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1378	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1379	#[rustc_legacy_const_generics(`2`)]
1380	pub unsafe fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1381	static_assert_uimm_bits!(IMM8, `8`);
1382	let imm8: i64 = IMM8 as i64;
1383	transmute(src:vpshldvq256(
1384	a:a.as_i64x4(),
1385	b:b.as_i64x4(),
1386	c:_mm256_set1_epi64x(imm8).as_i64x4(),
1387	))
1388	}
1389
1390	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1391	///
1392	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi64&expand=5055)
1393	#[inline]
1394	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1395	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1396	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1397	#[rustc_legacy_const_generics(`4`)]
1398	pub unsafe fn _mm256_mask_shldi_epi64<const IMM8: i32>(
1399	src: __m256i,
1400	k: __mmask8,
1401	a: __m256i,
1402	b: __m256i,
1403	) -> __m256i {
1404	static_assert_uimm_bits!(IMM8, `8`);
1405	let imm8: i64 = IMM8 as i64;
1406	let shf: i64x4 = vpshldvq256(
1407	a:a.as_i64x4(),
1408	b:b.as_i64x4(),
1409	c:_mm256_set1_epi64x(imm8).as_i64x4(),
1410	);
1411	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
1412	}
1413
1414	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1415	///
1416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi64&expand=5056)
1417	#[inline]
1418	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1419	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1420	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1421	#[rustc_legacy_const_generics(`3`)]
1422	pub unsafe fn _mm256_maskz_shldi_epi64<const IMM8: i32>(
1423	k: __mmask8,
1424	a: __m256i,
1425	b: __m256i,
1426	) -> __m256i {
1427	static_assert_uimm_bits!(IMM8, `8`);
1428	let imm8: i64 = IMM8 as i64;
1429	let shf: i64x4 = vpshldvq256(
1430	a:a.as_i64x4(),
1431	b:b.as_i64x4(),
1432	c:_mm256_set1_epi64x(imm8).as_i64x4(),
1433	);
1434	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1435	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1436	}
1437
1438	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1439	///
1440	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi64&expand=5054)
1441	#[inline]
1442	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1443	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1444	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1445	#[rustc_legacy_const_generics(`2`)]
1446	pub unsafe fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1447	static_assert_uimm_bits!(IMM8, `8`);
1448	let imm8: i64 = IMM8 as i64;
1449	transmute(src:vpshldvq128(
1450	a:a.as_i64x2(),
1451	b:b.as_i64x2(),
1452	c:_mm_set1_epi64x(imm8).as_i64x2(),
1453	))
1454	}
1455
1456	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1457	///
1458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi64&expand=5052)
1459	#[inline]
1460	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1461	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1462	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1463	#[rustc_legacy_const_generics(`4`)]
1464	pub unsafe fn _mm_mask_shldi_epi64<const IMM8: i32>(
1465	src: __m128i,
1466	k: __mmask8,
1467	a: __m128i,
1468	b: __m128i,
1469	) -> __m128i {
1470	static_assert_uimm_bits!(IMM8, `8`);
1471	let imm8: i64 = IMM8 as i64;
1472	let shf: i64x2 = vpshldvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:_mm_set1_epi64x(imm8).as_i64x2());
1473	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
1474	}
1475
1476	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1477	///
1478	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi64&expand=5053)
1479	#[inline]
1480	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1481	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1482	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1483	#[rustc_legacy_const_generics(`3`)]
1484	pub unsafe fn _mm_maskz_shldi_epi64<const IMM8: i32>(
1485	k: __mmask8,
1486	a: __m128i,
1487	b: __m128i,
1488	) -> __m128i {
1489	static_assert_uimm_bits!(IMM8, `8`);
1490	let imm8: i64 = IMM8 as i64;
1491	let shf: i64x2 = vpshldvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:_mm_set1_epi64x(imm8).as_i64x2());
1492	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1493	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1494	}
1495
1496	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1497	///
1498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi32&expand=5051)
1499	#[inline]
1500	#[target_feature(enable = "avx512vbmi2")]
1501	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1502	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1503	#[rustc_legacy_const_generics(`2`)]
1504	pub unsafe fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1505	static_assert_uimm_bits!(IMM8, `8`);
1506	transmute(src:vpshldvd(
1507	a:a.as_i32x16(),
1508	b:b.as_i32x16(),
1509	c:_mm512_set1_epi32(IMM8).as_i32x16(),
1510	))
1511	}
1512
1513	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1514	///
1515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi32&expand=5049)
1516	#[inline]
1517	#[target_feature(enable = "avx512vbmi2")]
1518	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1519	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1520	#[rustc_legacy_const_generics(`4`)]
1521	pub unsafe fn _mm512_mask_shldi_epi32<const IMM8: i32>(
1522	src: __m512i,
1523	k: __mmask16,
1524	a: __m512i,
1525	b: __m512i,
1526	) -> __m512i {
1527	static_assert_uimm_bits!(IMM8, `8`);
1528	let shf: i32x16 = vpshldvd(
1529	a:a.as_i32x16(),
1530	b:b.as_i32x16(),
1531	c:_mm512_set1_epi32(IMM8).as_i32x16(),
1532	);
1533	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
1534	}
1535
1536	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1537	///
1538	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi32&expand=5050)
1539	#[inline]
1540	#[target_feature(enable = "avx512vbmi2")]
1541	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1542	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1543	#[rustc_legacy_const_generics(`3`)]
1544	pub unsafe fn _mm512_maskz_shldi_epi32<const IMM8: i32>(
1545	k: __mmask16,
1546	a: __m512i,
1547	b: __m512i,
1548	) -> __m512i {
1549	static_assert_uimm_bits!(IMM8, `8`);
1550	let shf: i32x16 = vpshldvd(
1551	a:a.as_i32x16(),
1552	b:b.as_i32x16(),
1553	c:_mm512_set1_epi32(IMM8).as_i32x16(),
1554	);
1555	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
1556	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1557	}
1558
1559	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1560	///
1561	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi32&expand=5048)
1562	#[inline]
1563	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1564	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1565	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1566	#[rustc_legacy_const_generics(`2`)]
1567	pub unsafe fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1568	static_assert_uimm_bits!(IMM8, `8`);
1569	transmute(src:vpshldvd256(
1570	a:a.as_i32x8(),
1571	b:b.as_i32x8(),
1572	c:_mm256_set1_epi32(IMM8).as_i32x8(),
1573	))
1574	}
1575
1576	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1577	///
1578	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi32&expand=5046)
1579	#[inline]
1580	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1581	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1582	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1583	#[rustc_legacy_const_generics(`4`)]
1584	pub unsafe fn _mm256_mask_shldi_epi32<const IMM8: i32>(
1585	src: __m256i,
1586	k: __mmask8,
1587	a: __m256i,
1588	b: __m256i,
1589	) -> __m256i {
1590	static_assert_uimm_bits!(IMM8, `8`);
1591	let shf: i32x8 = vpshldvd256(
1592	a:a.as_i32x8(),
1593	b:b.as_i32x8(),
1594	c:_mm256_set1_epi32(IMM8).as_i32x8(),
1595	);
1596	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
1597	}
1598
1599	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1600	///
1601	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi32&expand=5047)
1602	#[inline]
1603	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1604	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1605	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1606	#[rustc_legacy_const_generics(`3`)]
1607	pub unsafe fn _mm256_maskz_shldi_epi32<const IMM8: i32>(
1608	k: __mmask8,
1609	a: __m256i,
1610	b: __m256i,
1611	) -> __m256i {
1612	static_assert_uimm_bits!(IMM8, `8`);
1613	let shf: i32x8 = vpshldvd256(
1614	a:a.as_i32x8(),
1615	b:b.as_i32x8(),
1616	c:_mm256_set1_epi32(IMM8).as_i32x8(),
1617	);
1618	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
1619	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1620	}
1621
1622	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1623	///
1624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi32&expand=5045)
1625	#[inline]
1626	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1627	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1628	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1629	#[rustc_legacy_const_generics(`2`)]
1630	pub unsafe fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1631	static_assert_uimm_bits!(IMM8, `8`);
1632	transmute(src:vpshldvd128(
1633	a:a.as_i32x4(),
1634	b:b.as_i32x4(),
1635	c:_mm_set1_epi32(IMM8).as_i32x4(),
1636	))
1637	}
1638
1639	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1640	///
1641	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi32&expand=5043)
1642	#[inline]
1643	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1644	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1645	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1646	#[rustc_legacy_const_generics(`4`)]
1647	pub unsafe fn _mm_mask_shldi_epi32<const IMM8: i32>(
1648	src: __m128i,
1649	k: __mmask8,
1650	a: __m128i,
1651	b: __m128i,
1652	) -> __m128i {
1653	static_assert_uimm_bits!(IMM8, `8`);
1654	let shf: i32x4 = vpshldvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:_mm_set1_epi32(IMM8).as_i32x4());
1655	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
1656	}
1657
1658	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1659	///
1660	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi32&expand=5044)
1661	#[inline]
1662	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1663	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1664	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1665	#[rustc_legacy_const_generics(`3`)]
1666	pub unsafe fn _mm_maskz_shldi_epi32<const IMM8: i32>(
1667	k: __mmask8,
1668	a: __m128i,
1669	b: __m128i,
1670	) -> __m128i {
1671	static_assert_uimm_bits!(IMM8, `8`);
1672	let shf: i32x4 = vpshldvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:_mm_set1_epi32(IMM8).as_i32x4());
1673	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1674	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1675	}
1676
1677	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1678	///
1679	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi16&expand=5042)
1680	#[inline]
1681	#[target_feature(enable = "avx512vbmi2")]
1682	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1683	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1684	#[rustc_legacy_const_generics(`2`)]
1685	pub unsafe fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1686	static_assert_uimm_bits!(IMM8, `8`);
1687	let imm8: i16 = IMM8 as i16;
1688	transmute(src:vpshldvw(
1689	a:a.as_i16x32(),
1690	b:b.as_i16x32(),
1691	c:_mm512_set1_epi16(imm8).as_i16x32(),
1692	))
1693	}
1694
1695	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1696	///
1697	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi16&expand=5040)
1698	#[inline]
1699	#[target_feature(enable = "avx512vbmi2")]
1700	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1701	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1702	#[rustc_legacy_const_generics(`4`)]
1703	pub unsafe fn _mm512_mask_shldi_epi16<const IMM8: i32>(
1704	src: __m512i,
1705	k: __mmask32,
1706	a: __m512i,
1707	b: __m512i,
1708	) -> __m512i {
1709	static_assert_uimm_bits!(IMM8, `8`);
1710	let imm8: i16 = IMM8 as i16;
1711	let shf: i16x32 = vpshldvw(
1712	a:a.as_i16x32(),
1713	b:b.as_i16x32(),
1714	c:_mm512_set1_epi16(imm8).as_i16x32(),
1715	);
1716	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x32()))
1717	}
1718
1719	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1720	///
1721	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi16&expand=5041)
1722	#[inline]
1723	#[target_feature(enable = "avx512vbmi2")]
1724	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1725	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1726	#[rustc_legacy_const_generics(`3`)]
1727	pub unsafe fn _mm512_maskz_shldi_epi16<const IMM8: i32>(
1728	k: __mmask32,
1729	a: __m512i,
1730	b: __m512i,
1731	) -> __m512i {
1732	static_assert_uimm_bits!(IMM8, `8`);
1733	let imm8: i16 = IMM8 as i16;
1734	let shf: i16x32 = vpshldvw(
1735	a:a.as_i16x32(),
1736	b:b.as_i16x32(),
1737	c:_mm512_set1_epi16(imm8).as_i16x32(),
1738	);
1739	let zero: i16x32 = _mm512_setzero_si512().as_i16x32();
1740	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1741	}
1742
1743	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1744	///
1745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi16&expand=5039)
1746	#[inline]
1747	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1748	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1749	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1750	#[rustc_legacy_const_generics(`2`)]
1751	pub unsafe fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1752	static_assert_uimm_bits!(IMM8, `8`);
1753	let imm8: i16 = IMM8 as i16;
1754	transmute(src:vpshldvw256(
1755	a:a.as_i16x16(),
1756	b:b.as_i16x16(),
1757	c:_mm256_set1_epi16(imm8).as_i16x16(),
1758	))
1759	}
1760
1761	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1762	///
1763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi16&expand=5037)
1764	#[inline]
1765	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1766	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1767	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1768	#[rustc_legacy_const_generics(`4`)]
1769	pub unsafe fn _mm256_mask_shldi_epi16<const IMM8: i32>(
1770	src: __m256i,
1771	k: __mmask16,
1772	a: __m256i,
1773	b: __m256i,
1774	) -> __m256i {
1775	static_assert_uimm_bits!(IMM8, `8`);
1776	let imm8: i16 = IMM8 as i16;
1777	let shf: i16x16 = vpshldvw256(
1778	a:a.as_i16x16(),
1779	b:b.as_i16x16(),
1780	c:_mm256_set1_epi16(imm8).as_i16x16(),
1781	);
1782	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x16()))
1783	}
1784
1785	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1786	///
1787	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi16&expand=5038)
1788	#[inline]
1789	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1790	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1791	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1792	#[rustc_legacy_const_generics(`3`)]
1793	pub unsafe fn _mm256_maskz_shldi_epi16<const IMM8: i32>(
1794	k: __mmask16,
1795	a: __m256i,
1796	b: __m256i,
1797	) -> __m256i {
1798	static_assert_uimm_bits!(IMM8, `8`);
1799	let imm8: i16 = IMM8 as i16;
1800	let shf: i16x16 = vpshldvw256(
1801	a:a.as_i16x16(),
1802	b:b.as_i16x16(),
1803	c:_mm256_set1_epi16(imm8).as_i16x16(),
1804	);
1805	let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
1806	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1807	}
1808
1809	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1810	///
1811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi16&expand=5036)
1812	#[inline]
1813	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1814	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1815	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1816	#[rustc_legacy_const_generics(`2`)]
1817	pub unsafe fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1818	static_assert_uimm_bits!(IMM8, `8`);
1819	let imm8: i16 = IMM8 as i16;
1820	transmute(src:vpshldvw128(
1821	a:a.as_i16x8(),
1822	b:b.as_i16x8(),
1823	c:_mm_set1_epi16(imm8).as_i16x8(),
1824	))
1825	}
1826
1827	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1828	///
1829	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi16&expand=5034)
1830	#[inline]
1831	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1832	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1833	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1834	#[rustc_legacy_const_generics(`4`)]
1835	pub unsafe fn _mm_mask_shldi_epi16<const IMM8: i32>(
1836	src: __m128i,
1837	k: __mmask8,
1838	a: __m128i,
1839	b: __m128i,
1840	) -> __m128i {
1841	static_assert_uimm_bits!(IMM8, `8`);
1842	let imm8: i16 = IMM8 as i16;
1843	let shf: i16x8 = vpshldvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:_mm_set1_epi16(imm8).as_i16x8());
1844	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x8()))
1845	}
1846
1847	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1848	///
1849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi16&expand=5035)
1850	#[inline]
1851	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1852	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1853	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1854	#[rustc_legacy_const_generics(`3`)]
1855	pub unsafe fn _mm_maskz_shldi_epi16<const IMM8: i32>(
1856	k: __mmask8,
1857	a: __m128i,
1858	b: __m128i,
1859	) -> __m128i {
1860	static_assert_uimm_bits!(IMM8, `8`);
1861	let imm8: i16 = IMM8 as i16;
1862	let shf: i16x8 = vpshldvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:_mm_set1_epi16(imm8).as_i16x8());
1863	let zero: i16x8 = _mm_setzero_si128().as_i16x8();
1864	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1865	}
1866
1867	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1868	///
1869	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi64&expand=5114)
1870	#[inline]
1871	#[target_feature(enable = "avx512vbmi2")]
1872	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1873	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1874	#[rustc_legacy_const_generics(`2`)]
1875	pub unsafe fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1876	static_assert_uimm_bits!(IMM8, `8`);
1877	let imm8: i64 = IMM8 as i64;
1878	transmute(src:vpshrdvq(
1879	a:a.as_i64x8(),
1880	b:b.as_i64x8(),
1881	c:_mm512_set1_epi64(imm8).as_i64x8(),
1882	))
1883	}
1884
1885	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1886	///
1887	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi64&expand=5112)
1888	#[inline]
1889	#[target_feature(enable = "avx512vbmi2")]
1890	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1891	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1892	#[rustc_legacy_const_generics(`4`)]
1893	pub unsafe fn _mm512_mask_shrdi_epi64<const IMM8: i32>(
1894	src: __m512i,
1895	k: __mmask8,
1896	a: __m512i,
1897	b: __m512i,
1898	) -> __m512i {
1899	static_assert_uimm_bits!(IMM8, `8`);
1900	let imm8: i64 = IMM8 as i64;
1901	let shf: i64x8 = vpshrdvq(
1902	a:a.as_i64x8(),
1903	b:b.as_i64x8(),
1904	c:_mm512_set1_epi64(imm8).as_i64x8(),
1905	);
1906	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
1907	}
1908
1909	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1910	///
1911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi64&expand=5113)
1912	#[inline]
1913	#[target_feature(enable = "avx512vbmi2")]
1914	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1915	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `255`))] //should be vpshrdq
1916	#[rustc_legacy_const_generics(`3`)]
1917	pub unsafe fn _mm512_maskz_shrdi_epi64<const IMM8: i32>(
1918	k: __mmask8,
1919	a: __m512i,
1920	b: __m512i,
1921	) -> __m512i {
1922	static_assert_uimm_bits!(IMM8, `8`);
1923	let imm8: i64 = IMM8 as i64;
1924	let shf: i64x8 = vpshrdvq(
1925	a:a.as_i64x8(),
1926	b:b.as_i64x8(),
1927	c:_mm512_set1_epi64(imm8).as_i64x8(),
1928	);
1929	let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
1930	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1931	}
1932
1933	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1934	///
1935	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi64&expand=5111)
1936	#[inline]
1937	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1938	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1939	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1940	#[rustc_legacy_const_generics(`2`)]
1941	pub unsafe fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1942	static_assert_uimm_bits!(IMM8, `8`);
1943	let imm8: i64 = IMM8 as i64;
1944	transmute(src:vpshrdvq256(
1945	a:a.as_i64x4(),
1946	b:b.as_i64x4(),
1947	c:_mm256_set1_epi64x(imm8).as_i64x4(),
1948	))
1949	}
1950
1951	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1952	///
1953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi64&expand=5109)
1954	#[inline]
1955	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1956	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1957	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1958	#[rustc_legacy_const_generics(`4`)]
1959	pub unsafe fn _mm256_mask_shrdi_epi64<const IMM8: i32>(
1960	src: __m256i,
1961	k: __mmask8,
1962	a: __m256i,
1963	b: __m256i,
1964	) -> __m256i {
1965	static_assert_uimm_bits!(IMM8, `8`);
1966	let imm8: i64 = IMM8 as i64;
1967	let shf: i64x4 = vpshrdvq256(
1968	a:a.as_i64x4(),
1969	b:b.as_i64x4(),
1970	c:_mm256_set1_epi64x(imm8).as_i64x4(),
1971	);
1972	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
1973	}
1974
1975	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1976	///
1977	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi64&expand=5110)
1978	#[inline]
1979	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1980	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1981	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1982	#[rustc_legacy_const_generics(`3`)]
1983	pub unsafe fn _mm256_maskz_shrdi_epi64<const IMM8: i32>(
1984	k: __mmask8,
1985	a: __m256i,
1986	b: __m256i,
1987	) -> __m256i {
1988	static_assert_uimm_bits!(IMM8, `8`);
1989	let imm8: i64 = IMM8 as i64;
1990	let shf: i64x4 = vpshrdvq256(
1991	a:a.as_i64x4(),
1992	b:b.as_i64x4(),
1993	c:_mm256_set1_epi64x(imm8).as_i64x4(),
1994	);
1995	let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1996	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1997	}
1998
1999	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
2000	///
2001	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi64&expand=5108)
2002	#[inline]
2003	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2004	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2005	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
2006	#[rustc_legacy_const_generics(`2`)]
2007	pub unsafe fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2008	static_assert_uimm_bits!(IMM8, `8`);
2009	let imm8: i64 = IMM8 as i64;
2010	transmute(src:vpshrdvq128(
2011	a:a.as_i64x2(),
2012	b:b.as_i64x2(),
2013	c:_mm_set1_epi64x(imm8).as_i64x2(),
2014	))
2015	}
2016
2017	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
2018	///
2019	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi64&expand=5106)
2020	#[inline]
2021	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2022	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2023	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
2024	#[rustc_legacy_const_generics(`4`)]
2025	pub unsafe fn _mm_mask_shrdi_epi64<const IMM8: i32>(
2026	src: __m128i,
2027	k: __mmask8,
2028	a: __m128i,
2029	b: __m128i,
2030	) -> __m128i {
2031	static_assert_uimm_bits!(IMM8, `8`);
2032	let imm8: i64 = IMM8 as i64;
2033	let shf: i64x2 = vpshrdvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:_mm_set1_epi64x(imm8).as_i64x2());
2034	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
2035	}
2036
2037	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2038	///
2039	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi64&expand=5107)
2040	#[inline]
2041	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2042	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2043	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
2044	#[rustc_legacy_const_generics(`3`)]
2045	pub unsafe fn _mm_maskz_shrdi_epi64<const IMM8: i32>(
2046	k: __mmask8,
2047	a: __m128i,
2048	b: __m128i,
2049	) -> __m128i {
2050	static_assert_uimm_bits!(IMM8, `8`);
2051	let imm8: i64 = IMM8 as i64;
2052	let shf: i64x2 = vpshrdvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:_mm_set1_epi64x(imm8).as_i64x2());
2053	let zero: i64x2 = _mm_setzero_si128().as_i64x2();
2054	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2055	}
2056
2057	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
2058	///
2059	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi32&expand=5105)
2060	#[inline]
2061	#[target_feature(enable = "avx512vbmi2")]
2062	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2063	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
2064	#[rustc_legacy_const_generics(`2`)]
2065	pub unsafe fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
2066	static_assert_uimm_bits!(IMM8, `8`);
2067	transmute(src:vpshrdvd(
2068	a:a.as_i32x16(),
2069	b:b.as_i32x16(),
2070	c:_mm512_set1_epi32(IMM8).as_i32x16(),
2071	))
2072	}
2073
2074	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2075	///
2076	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi32&expand=5103)
2077	#[inline]
2078	#[target_feature(enable = "avx512vbmi2")]
2079	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2080	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
2081	#[rustc_legacy_const_generics(`4`)]
2082	pub unsafe fn _mm512_mask_shrdi_epi32<const IMM8: i32>(
2083	src: __m512i,
2084	k: __mmask16,
2085	a: __m512i,
2086	b: __m512i,
2087	) -> __m512i {
2088	static_assert_uimm_bits!(IMM8, `8`);
2089	let shf: i32x16 = vpshrdvd(
2090	a:a.as_i32x16(),
2091	b:b.as_i32x16(),
2092	c:_mm512_set1_epi32(IMM8).as_i32x16(),
2093	);
2094	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
2095	}
2096
2097	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2098	///
2099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi32&expand=5104)
2100	#[inline]
2101	#[target_feature(enable = "avx512vbmi2")]
2102	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2103	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
2104	#[rustc_legacy_const_generics(`3`)]
2105	pub unsafe fn _mm512_maskz_shrdi_epi32<const IMM8: i32>(
2106	k: __mmask16,
2107	a: __m512i,
2108	b: __m512i,
2109	) -> __m512i {
2110	static_assert_uimm_bits!(IMM8, `8`);
2111	let shf: i32x16 = vpshrdvd(
2112	a:a.as_i32x16(),
2113	b:b.as_i32x16(),
2114	c:_mm512_set1_epi32(IMM8).as_i32x16(),
2115	);
2116	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
2117	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2118	}
2119
2120	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
2121	///
2122	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi32&expand=5102)
2123	#[inline]
2124	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2125	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2126	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
2127	#[rustc_legacy_const_generics(`2`)]
2128	pub unsafe fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2129	static_assert_uimm_bits!(IMM8, `8`);
2130	transmute(src:vpshrdvd256(
2131	a:a.as_i32x8(),
2132	b:b.as_i32x8(),
2133	c:_mm256_set1_epi32(IMM8).as_i32x8(),
2134	))
2135	}
2136
2137	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2138	///
2139	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi32&expand=5100)
2140	#[inline]
2141	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2142	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2143	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
2144	#[rustc_legacy_const_generics(`4`)]
2145	pub unsafe fn _mm256_mask_shrdi_epi32<const IMM8: i32>(
2146	src: __m256i,
2147	k: __mmask8,
2148	a: __m256i,
2149	b: __m256i,
2150	) -> __m256i {
2151	static_assert_uimm_bits!(IMM8, `8`);
2152	let shf: i32x8 = vpshrdvd256(
2153	a:a.as_i32x8(),
2154	b:b.as_i32x8(),
2155	c:_mm256_set1_epi32(IMM8).as_i32x8(),
2156	);
2157	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
2158	}
2159
2160	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2161	///
2162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi32&expand=5101)
2163	#[inline]
2164	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2165	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2166	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
2167	#[rustc_legacy_const_generics(`3`)]
2168	pub unsafe fn _mm256_maskz_shrdi_epi32<const IMM8: i32>(
2169	k: __mmask8,
2170	a: __m256i,
2171	b: __m256i,
2172	) -> __m256i {
2173	static_assert_uimm_bits!(IMM8, `8`);
2174	let shf: i32x8 = vpshrdvd256(
2175	a:a.as_i32x8(),
2176	b:b.as_i32x8(),
2177	c:_mm256_set1_epi32(IMM8).as_i32x8(),
2178	);
2179	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
2180	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2181	}
2182
2183	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
2184	///
2185	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi32&expand=5099)
2186	#[inline]
2187	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2188	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2189	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
2190	#[rustc_legacy_const_generics(`2`)]
2191	pub unsafe fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2192	static_assert_uimm_bits!(IMM8, `8`);
2193	transmute(src:vpshrdvd128(
2194	a:a.as_i32x4(),
2195	b:b.as_i32x4(),
2196	c:_mm_set1_epi32(IMM8).as_i32x4(),
2197	))
2198	}
2199
2200	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2201	///
2202	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi32&expand=5097)
2203	#[inline]
2204	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2205	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2206	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
2207	#[rustc_legacy_const_generics(`4`)]
2208	pub unsafe fn _mm_mask_shrdi_epi32<const IMM8: i32>(
2209	src: __m128i,
2210	k: __mmask8,
2211	a: __m128i,
2212	b: __m128i,
2213	) -> __m128i {
2214	static_assert_uimm_bits!(IMM8, `8`);
2215	let shf: i32x4 = vpshrdvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:_mm_set1_epi32(IMM8).as_i32x4());
2216	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
2217	}
2218
2219	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2220	///
2221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi32&expand=5098)
2222	#[inline]
2223	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2224	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2225	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
2226	#[rustc_legacy_const_generics(`3`)]
2227	pub unsafe fn _mm_maskz_shrdi_epi32<const IMM8: i32>(
2228	k: __mmask8,
2229	a: __m128i,
2230	b: __m128i,
2231	) -> __m128i {
2232	static_assert_uimm_bits!(IMM8, `8`);
2233	let shf: i32x4 = vpshrdvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:_mm_set1_epi32(IMM8).as_i32x4());
2234	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
2235	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2236	}
2237
2238	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2239	///
2240	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi16&expand=5096)
2241	#[inline]
2242	#[target_feature(enable = "avx512vbmi2")]
2243	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2244	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2245	#[rustc_legacy_const_generics(`2`)]
2246	pub unsafe fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
2247	static_assert_uimm_bits!(IMM8, `8`);
2248	let imm8: i16 = IMM8 as i16;
2249	assert!(matches!(imm8, `0`..=`255`));
2250	transmute(src:vpshrdvw(
2251	a:a.as_i16x32(),
2252	b:b.as_i16x32(),
2253	c:_mm512_set1_epi16(imm8).as_i16x32(),
2254	))
2255	}
2256
2257	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2258	///
2259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi16&expand=5094)
2260	#[inline]
2261	#[target_feature(enable = "avx512vbmi2")]
2262	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2263	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2264	#[rustc_legacy_const_generics(`4`)]
2265	pub unsafe fn _mm512_mask_shrdi_epi16<const IMM8: i32>(
2266	src: __m512i,
2267	k: __mmask32,
2268	a: __m512i,
2269	b: __m512i,
2270	) -> __m512i {
2271	static_assert_uimm_bits!(IMM8, `8`);
2272	let imm8: i16 = IMM8 as i16;
2273	assert!(matches!(imm8, `0`..=`255`));
2274	let shf: i16x32 = vpshrdvw(
2275	a:a.as_i16x32(),
2276	b:b.as_i16x32(),
2277	c:_mm512_set1_epi16(imm8).as_i16x32(),
2278	);
2279	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x32()))
2280	}
2281
2282	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2283	///
2284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi16&expand=5095)
2285	#[inline]
2286	#[target_feature(enable = "avx512vbmi2")]
2287	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2288	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2289	#[rustc_legacy_const_generics(`3`)]
2290	pub unsafe fn _mm512_maskz_shrdi_epi16<const IMM8: i32>(
2291	k: __mmask32,
2292	a: __m512i,
2293	b: __m512i,
2294	) -> __m512i {
2295	static_assert_uimm_bits!(IMM8, `8`);
2296	let imm8: i16 = IMM8 as i16;
2297	assert!(matches!(imm8, `0`..=`255`));
2298	let shf: i16x32 = vpshrdvw(
2299	a:a.as_i16x32(),
2300	b:b.as_i16x32(),
2301	c:_mm512_set1_epi16(imm8).as_i16x32(),
2302	);
2303	let zero: i16x32 = _mm512_setzero_si512().as_i16x32();
2304	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2305	}
2306
2307	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2308	///
2309	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi16&expand=5093)
2310	#[inline]
2311	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2312	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2313	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2314	#[rustc_legacy_const_generics(`2`)]
2315	pub unsafe fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2316	static_assert_uimm_bits!(IMM8, `8`);
2317	let imm8: i16 = IMM8 as i16;
2318	assert!(matches!(imm8, `0`..=`255`));
2319	transmute(src:vpshrdvw256(
2320	a:a.as_i16x16(),
2321	b:b.as_i16x16(),
2322	c:_mm256_set1_epi16(imm8).as_i16x16(),
2323	))
2324	}
2325
2326	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2327	///
2328	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi16&expand=5091)
2329	#[inline]
2330	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2331	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2332	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2333	#[rustc_legacy_const_generics(`4`)]
2334	pub unsafe fn _mm256_mask_shrdi_epi16<const IMM8: i32>(
2335	src: __m256i,
2336	k: __mmask16,
2337	a: __m256i,
2338	b: __m256i,
2339	) -> __m256i {
2340	static_assert_uimm_bits!(IMM8, `8`);
2341	let imm8: i16 = IMM8 as i16;
2342	assert!(matches!(imm8, `0`..=`255`));
2343	let shf: i16x16 = vpshrdvw256(
2344	a:a.as_i16x16(),
2345	b:b.as_i16x16(),
2346	c:_mm256_set1_epi16(imm8).as_i16x16(),
2347	);
2348	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x16()))
2349	}
2350
2351	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2352	///
2353	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi16&expand=5092)
2354	#[inline]
2355	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2356	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2357	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2358	#[rustc_legacy_const_generics(`3`)]
2359	pub unsafe fn _mm256_maskz_shrdi_epi16<const IMM8: i32>(
2360	k: __mmask16,
2361	a: __m256i,
2362	b: __m256i,
2363	) -> __m256i {
2364	static_assert_uimm_bits!(IMM8, `8`);
2365	let imm8: i16 = IMM8 as i16;
2366	let shf: i16x16 = vpshrdvw256(
2367	a:a.as_i16x16(),
2368	b:b.as_i16x16(),
2369	c:_mm256_set1_epi16(imm8).as_i16x16(),
2370	);
2371	let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
2372	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2373	}
2374
2375	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2376	///
2377	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi16&expand=5090)
2378	#[inline]
2379	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2380	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2381	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2382	#[rustc_legacy_const_generics(`2`)]
2383	pub unsafe fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2384	static_assert_uimm_bits!(IMM8, `8`);
2385	let imm8: i16 = IMM8 as i16;
2386	transmute(src:vpshrdvw128(
2387	a:a.as_i16x8(),
2388	b:b.as_i16x8(),
2389	c:_mm_set1_epi16(imm8).as_i16x8(),
2390	))
2391	}
2392
2393	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2394	///
2395	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi16&expand=5088)
2396	#[inline]
2397	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2398	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2399	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2400	#[rustc_legacy_const_generics(`4`)]
2401	pub unsafe fn _mm_mask_shrdi_epi16<const IMM8: i32>(
2402	src: __m128i,
2403	k: __mmask8,
2404	a: __m128i,
2405	b: __m128i,
2406	) -> __m128i {
2407	static_assert_uimm_bits!(IMM8, `8`);
2408	let imm8: i16 = IMM8 as i16;
2409	let shf: i16x8 = vpshrdvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:_mm_set1_epi16(imm8).as_i16x8());
2410	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x8()))
2411	}
2412
2413	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2414	///
2415	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi16&expand=5089)
2416	#[inline]
2417	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2418	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2419	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2420	#[rustc_legacy_const_generics(`3`)]
2421	pub unsafe fn _mm_maskz_shrdi_epi16<const IMM8: i32>(
2422	k: __mmask8,
2423	a: __m128i,
2424	b: __m128i,
2425	) -> __m128i {
2426	static_assert_uimm_bits!(IMM8, `8`);
2427	let imm8: i16 = IMM8 as i16;
2428	let shf: i16x8 = vpshrdvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:_mm_set1_epi16(imm8).as_i16x8());
2429	let zero: i16x8 = _mm_setzero_si128().as_i16x8();
2430	transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2431	}
2432
2433	#[allow(improper_ctypes)]
2434	extern "C" {
2435	#[link_name = "llvm.x86.avx512.mask.compress.store.w.512"]
2436	fn vcompressstorew(mem: *mut i8, data: i16x32, mask: u32);
2437	#[link_name = "llvm.x86.avx512.mask.compress.store.w.256"]
2438	fn vcompressstorew256(mem: *mut i8, data: i16x16, mask: u16);
2439	#[link_name = "llvm.x86.avx512.mask.compress.store.w.128"]
2440	fn vcompressstorew128(mem: *mut i8, data: i16x8, mask: u8);
2441
2442	#[link_name = "llvm.x86.avx512.mask.compress.store.b.512"]
2443	fn vcompressstoreb(mem: *mut i8, data: i8x64, mask: u64);
2444	#[link_name = "llvm.x86.avx512.mask.compress.store.b.256"]
2445	fn vcompressstoreb256(mem: *mut i8, data: i8x32, mask: u32);
2446	#[link_name = "llvm.x86.avx512.mask.compress.store.b.128"]
2447	fn vcompressstoreb128(mem: *mut i8, data: i8x16, mask: u16);
2448
2449	#[link_name = "llvm.x86.avx512.mask.compress.w.512"]
2450	fn vpcompressw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2451	#[link_name = "llvm.x86.avx512.mask.compress.w.256"]
2452	fn vpcompressw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2453	#[link_name = "llvm.x86.avx512.mask.compress.w.128"]
2454	fn vpcompressw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2455
2456	#[link_name = "llvm.x86.avx512.mask.compress.b.512"]
2457	fn vpcompressb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2458	#[link_name = "llvm.x86.avx512.mask.compress.b.256"]
2459	fn vpcompressb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2460	#[link_name = "llvm.x86.avx512.mask.compress.b.128"]
2461	fn vpcompressb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2462
2463	#[link_name = "llvm.x86.avx512.mask.expand.w.512"]
2464	fn vpexpandw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2465	#[link_name = "llvm.x86.avx512.mask.expand.w.256"]
2466	fn vpexpandw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2467	#[link_name = "llvm.x86.avx512.mask.expand.w.128"]
2468	fn vpexpandw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2469
2470	#[link_name = "llvm.x86.avx512.mask.expand.b.512"]
2471	fn vpexpandb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2472	#[link_name = "llvm.x86.avx512.mask.expand.b.256"]
2473	fn vpexpandb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2474	#[link_name = "llvm.x86.avx512.mask.expand.b.128"]
2475	fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2476
2477	#[link_name = "llvm.fshl.v8i64"]
2478	fn vpshldvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
2479	#[link_name = "llvm.fshl.v4i64"]
2480	fn vpshldvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
2481	#[link_name = "llvm.fshl.v2i64"]
2482	fn vpshldvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
2483	#[link_name = "llvm.fshl.v16i32"]
2484	fn vpshldvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
2485	#[link_name = "llvm.fshl.v8i32"]
2486	fn vpshldvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
2487	#[link_name = "llvm.fshl.v4i32"]
2488	fn vpshldvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
2489	#[link_name = "llvm.fshl.v32i16"]
2490	fn vpshldvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
2491	#[link_name = "llvm.fshl.v16i16"]
2492	fn vpshldvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
2493	#[link_name = "llvm.fshl.v8i16"]
2494	fn vpshldvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
2495
2496	#[link_name = "llvm.fshr.v8i64"]
2497	fn vpshrdvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
2498	#[link_name = "llvm.fshr.v4i64"]
2499	fn vpshrdvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
2500	#[link_name = "llvm.fshr.v2i64"]
2501	fn vpshrdvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
2502	#[link_name = "llvm.fshr.v16i32"]
2503	fn vpshrdvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
2504	#[link_name = "llvm.fshr.v8i32"]
2505	fn vpshrdvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
2506	#[link_name = "llvm.fshr.v4i32"]
2507	fn vpshrdvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
2508	#[link_name = "llvm.fshr.v32i16"]
2509	fn vpshrdvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
2510	#[link_name = "llvm.fshr.v16i16"]
2511	fn vpshrdvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
2512	#[link_name = "llvm.fshr.v8i16"]
2513	fn vpshrdvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
2514	}
2515
2516	#[cfg(test)]
2517	mod tests {
2518
2519	use stdarch_test::simd_test;
2520
2521	use crate::core_arch::x86::*;
2522	use crate::hint::black_box;
2523
2524	#[simd_test(enable = "avx512vbmi2")]
2525	unsafe fn test_mm512_mask_compress_epi16() {
2526	let src = _mm512_set1_epi16(`200`);
2527	#[rustfmt::skip]
2528	let a = _mm512_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2529	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2530	let r = _mm512_mask_compress_epi16(src, `0b01010101_01010101_01010101_01010101`, a);
2531	#[rustfmt::skip]
2532	let e = _mm512_set_epi16(
2533	`200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`,
2534	`1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`, `17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
2535	);
2536	assert_eq_m512i(r, e);
2537	}
2538
2539	#[simd_test(enable = "avx512vbmi2")]
2540	unsafe fn test_mm512_maskz_compress_epi16() {
2541	#[rustfmt::skip]
2542	let a = _mm512_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2543	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2544	let r = _mm512_maskz_compress_epi16(`0b01010101_01010101_01010101_01010101`, a);
2545	#[rustfmt::skip]
2546	let e = _mm512_set_epi16(
2547	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
2548	`1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`, `17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
2549	);
2550	assert_eq_m512i(r, e);
2551	}
2552
2553	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2554	unsafe fn test_mm256_mask_compress_epi16() {
2555	let src = _mm256_set1_epi16(`200`);
2556	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2557	let r = _mm256_mask_compress_epi16(src, `0b01010101_01010101`, a);
2558	let e = _mm256_set_epi16(
2559	`200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`,
2560	);
2561	assert_eq_m256i(r, e);
2562	}
2563
2564	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2565	unsafe fn test_mm256_maskz_compress_epi16() {
2566	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2567	let r = _mm256_maskz_compress_epi16(`0b01010101_01010101`, a);
2568	let e = _mm256_set_epi16(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`);
2569	assert_eq_m256i(r, e);
2570	}
2571
2572	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2573	unsafe fn test_mm_mask_compress_epi16() {
2574	let src = _mm_set1_epi16(`200`);
2575	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
2576	let r = _mm_mask_compress_epi16(src, `0b01010101`, a);
2577	let e = _mm_set_epi16(`200`, `200`, `200`, `200`, `1`, `3`, `5`, `7`);
2578	assert_eq_m128i(r, e);
2579	}
2580
2581	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2582	unsafe fn test_mm_maskz_compress_epi16() {
2583	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
2584	let r = _mm_maskz_compress_epi16(`0b01010101`, a);
2585	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `1`, `3`, `5`, `7`);
2586	assert_eq_m128i(r, e);
2587	}
2588
2589	#[simd_test(enable = "avx512vbmi2")]
2590	unsafe fn test_mm512_mask_compress_epi8() {
2591	let src = _mm512_set1_epi8(`100`);
2592	#[rustfmt::skip]
2593	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2594	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
2595	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
2596	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
2597	let r = _mm512_mask_compress_epi8(
2598	src,
2599	`0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101`,
2600	a,
2601	);
2602	#[rustfmt::skip]
2603	let e = _mm512_set_epi8(
2604	`100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`,
2605	`100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`,
2606	`1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`, `17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
2607	`33`, `35`, `37`, `39`, `41`, `43`, `45`, `47`, `49`, `51`, `53`, `55`, `57`, `59`, `61`, `63`,
2608	);
2609	assert_eq_m512i(r, e);
2610	}
2611
2612	#[simd_test(enable = "avx512vbmi2")]
2613	unsafe fn test_mm512_maskz_compress_epi8() {
2614	#[rustfmt::skip]
2615	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2616	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
2617	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
2618	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
2619	let r = _mm512_maskz_compress_epi8(
2620	`0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101`,
2621	a,
2622	);
2623	#[rustfmt::skip]
2624	let e = _mm512_set_epi8(
2625	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
2626	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
2627	`1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`, `17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
2628	`33`, `35`, `37`, `39`, `41`, `43`, `45`, `47`, `49`, `51`, `53`, `55`, `57`, `59`, `61`, `63`,
2629	);
2630	assert_eq_m512i(r, e);
2631	}
2632
2633	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2634	unsafe fn test_mm256_mask_compress_epi8() {
2635	let src = _mm256_set1_epi8(`100`);
2636	#[rustfmt::skip]
2637	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2638	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2639	let r = _mm256_mask_compress_epi8(src, `0b01010101_01010101_01010101_01010101`, a);
2640	#[rustfmt::skip]
2641	let e = _mm256_set_epi8(
2642	`100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`,
2643	`1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`, `17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
2644	);
2645	assert_eq_m256i(r, e);
2646	}
2647
2648	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2649	unsafe fn test_mm256_maskz_compress_epi8() {
2650	#[rustfmt::skip]
2651	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2652	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2653	let r = _mm256_maskz_compress_epi8(`0b01010101_01010101_01010101_01010101`, a);
2654	#[rustfmt::skip]
2655	let e = _mm256_set_epi8(
2656	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
2657	`1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`, `17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
2658	);
2659	assert_eq_m256i(r, e);
2660	}
2661
2662	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2663	unsafe fn test_mm_mask_compress_epi8() {
2664	let src = _mm_set1_epi8(`100`);
2665	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2666	let r = _mm_mask_compress_epi8(src, `0b01010101_01010101`, a);
2667	let e = _mm_set_epi8(
2668	`100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`,
2669	);
2670	assert_eq_m128i(r, e);
2671	}
2672
2673	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2674	unsafe fn test_mm_maskz_compress_epi8() {
2675	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2676	let r = _mm_maskz_compress_epi8(`0b01010101_01010101`, a);
2677	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`);
2678	assert_eq_m128i(r, e);
2679	}
2680
2681	#[simd_test(enable = "avx512vbmi2")]
2682	unsafe fn test_mm512_mask_expand_epi16() {
2683	let src = _mm512_set1_epi16(`200`);
2684	#[rustfmt::skip]
2685	let a = _mm512_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2686	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2687	let r = _mm512_mask_expand_epi16(src, `0b01010101_01010101_01010101_01010101`, a);
2688	#[rustfmt::skip]
2689	let e = _mm512_set_epi16(
2690	`200`, `16`, `200`, `17`, `200`, `18`, `200`, `19`, `200`, `20`, `200`, `21`, `200`, `22`, `200`, `23`,
2691	`200`, `24`, `200`, `25`, `200`, `26`, `200`, `27`, `200`, `28`, `200`, `29`, `200`, `30`, `200`, `31`,
2692	);
2693	assert_eq_m512i(r, e);
2694	}
2695
2696	#[simd_test(enable = "avx512vbmi2")]
2697	unsafe fn test_mm512_maskz_expand_epi16() {
2698	#[rustfmt::skip]
2699	let a = _mm512_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2700	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2701	let r = _mm512_maskz_expand_epi16(`0b01010101_01010101_01010101_01010101`, a);
2702	#[rustfmt::skip]
2703	let e = _mm512_set_epi16(`0`, `16`, `0`, `17`, `0`, `18`, `0`, `19`, `0`, `20`, `0`, `21`, `0`, `22`, `0`, `23`,
2704	`0`, `24`, `0`, `25`, `0`, `26`, `0`, `27`, `0`, `28`, `0`, `29`, `0`, `30`, `0`, `31`);
2705	assert_eq_m512i(r, e);
2706	}
2707
2708	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2709	unsafe fn test_mm256_mask_expand_epi16() {
2710	let src = _mm256_set1_epi16(`200`);
2711	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2712	let r = _mm256_mask_expand_epi16(src, `0b01010101_01010101`, a);
2713	let e = _mm256_set_epi16(
2714	`200`, `8`, `200`, `9`, `200`, `10`, `200`, `11`, `200`, `12`, `200`, `13`, `200`, `14`, `200`, `15`,
2715	);
2716	assert_eq_m256i(r, e);
2717	}
2718
2719	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2720	unsafe fn test_mm256_maskz_expand_epi16() {
2721	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2722	let r = _mm256_maskz_expand_epi16(`0b01010101_01010101`, a);
2723	let e = _mm256_set_epi16(`0`, `8`, `0`, `9`, `0`, `10`, `0`, `11`, `0`, `12`, `0`, `13`, `0`, `14`, `0`, `15`);
2724	assert_eq_m256i(r, e);
2725	}
2726
2727	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2728	unsafe fn test_mm_mask_expand_epi16() {
2729	let src = _mm_set1_epi16(`200`);
2730	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
2731	let r = _mm_mask_expand_epi16(src, `0b01010101`, a);
2732	let e = _mm_set_epi16(`200`, `4`, `200`, `5`, `200`, `6`, `200`, `7`);
2733	assert_eq_m128i(r, e);
2734	}
2735
2736	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2737	unsafe fn test_mm_maskz_expand_epi16() {
2738	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
2739	let r = _mm_maskz_expand_epi16(`0b01010101`, a);
2740	let e = _mm_set_epi16(`0`, `4`, `0`, `5`, `0`, `6`, `0`, `7`);
2741	assert_eq_m128i(r, e);
2742	}
2743
2744	#[simd_test(enable = "avx512vbmi2")]
2745	unsafe fn test_mm512_mask_expand_epi8() {
2746	let src = _mm512_set1_epi8(`100`);
2747	#[rustfmt::skip]
2748	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2749	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
2750	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
2751	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
2752	let r = _mm512_mask_expand_epi8(
2753	src,
2754	`0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101`,
2755	a,
2756	);
2757	#[rustfmt::skip]
2758	let e = _mm512_set_epi8(
2759	`100`, `32`, `100`, `33`, `100`, `34`, `100`, `35`, `100`, `36`, `100`, `37`, `100`, `38`, `100`, `39`,
2760	`100`, `40`, `100`, `41`, `100`, `42`, `100`, `43`, `100`, `44`, `100`, `45`, `100`, `46`, `100`, `47`,
2761	`100`, `48`, `100`, `49`, `100`, `50`, `100`, `51`, `100`, `52`, `100`, `53`, `100`, `54`, `100`, `55`,
2762	`100`, `56`, `100`, `57`, `100`, `58`, `100`, `59`, `100`, `60`, `100`, `61`, `100`, `62`, `100`, `63`,
2763	);
2764	assert_eq_m512i(r, e);
2765	}
2766
2767	#[simd_test(enable = "avx512vbmi2")]
2768	unsafe fn test_mm512_maskz_expand_epi8() {
2769	#[rustfmt::skip]
2770	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2771	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
2772	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
2773	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
2774	let r = _mm512_maskz_expand_epi8(
2775	`0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101`,
2776	a,
2777	);
2778	#[rustfmt::skip]
2779	let e = _mm512_set_epi8(
2780	`0`, `32`, `0`, `33`, `0`, `34`, `0`, `35`, `0`, `36`, `0`, `37`, `0`, `38`, `0`, `39`,
2781	`0`, `40`, `0`, `41`, `0`, `42`, `0`, `43`, `0`, `44`, `0`, `45`, `0`, `46`, `0`, `47`,
2782	`0`, `48`, `0`, `49`, `0`, `50`, `0`, `51`, `0`, `52`, `0`, `53`, `0`, `54`, `0`, `55`,
2783	`0`, `56`, `0`, `57`, `0`, `58`, `0`, `59`, `0`, `60`, `0`, `61`, `0`, `62`, `0`, `63`,
2784	);
2785	assert_eq_m512i(r, e);
2786	}
2787
2788	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2789	unsafe fn test_mm256_mask_expand_epi8() {
2790	let src = _mm256_set1_epi8(`100`);
2791	#[rustfmt::skip]
2792	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2793	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2794	let r = _mm256_mask_expand_epi8(src, `0b01010101_01010101_01010101_01010101`, a);
2795	#[rustfmt::skip]
2796	let e = _mm256_set_epi8(
2797	`100`, `16`, `100`, `17`, `100`, `18`, `100`, `19`, `100`, `20`, `100`, `21`, `100`, `22`, `100`, `23`,
2798	`100`, `24`, `100`, `25`, `100`, `26`, `100`, `27`, `100`, `28`, `100`, `29`, `100`, `30`, `100`, `31`,
2799	);
2800	assert_eq_m256i(r, e);
2801	}
2802
2803	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2804	unsafe fn test_mm256_maskz_expand_epi8() {
2805	#[rustfmt::skip]
2806	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2807	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2808	let r = _mm256_maskz_expand_epi8(`0b01010101_01010101_01010101_01010101`, a);
2809	#[rustfmt::skip]
2810	let e = _mm256_set_epi8(
2811	`0`, `16`, `0`, `17`, `0`, `18`, `0`, `19`, `0`, `20`, `0`, `21`, `0`, `22`, `0`, `23`,
2812	`0`, `24`, `0`, `25`, `0`, `26`, `0`, `27`, `0`, `28`, `0`, `29`, `0`, `30`, `0`, `31`,
2813	);
2814	assert_eq_m256i(r, e);
2815	}
2816
2817	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2818	unsafe fn test_mm_mask_expand_epi8() {
2819	let src = _mm_set1_epi8(`100`);
2820	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2821	let r = _mm_mask_expand_epi8(src, `0b01010101_01010101`, a);
2822	let e = _mm_set_epi8(
2823	`100`, `8`, `100`, `9`, `100`, `10`, `100`, `11`, `100`, `12`, `100`, `13`, `100`, `14`, `100`, `15`,
2824	);
2825	assert_eq_m128i(r, e);
2826	}
2827
2828	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2829	unsafe fn test_mm_maskz_expand_epi8() {
2830	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2831	let r = _mm_maskz_expand_epi8(`0b01010101_01010101`, a);
2832	let e = _mm_set_epi8(`0`, `8`, `0`, `9`, `0`, `10`, `0`, `11`, `0`, `12`, `0`, `13`, `0`, `14`, `0`, `15`);
2833	assert_eq_m128i(r, e);
2834	}
2835
2836	#[simd_test(enable = "avx512vbmi2")]
2837	unsafe fn test_mm512_shldv_epi64() {
2838	let a = _mm512_set1_epi64(`1`);
2839	let b = _mm512_set1_epi64(`1` << `63`);
2840	let c = _mm512_set1_epi64(`2`);
2841	let r = _mm512_shldv_epi64(a, b, c);
2842	let e = _mm512_set1_epi64(`6`);
2843	assert_eq_m512i(r, e);
2844	}
2845
2846	#[simd_test(enable = "avx512vbmi2")]
2847	unsafe fn test_mm512_mask_shldv_epi64() {
2848	let a = _mm512_set1_epi64(`1`);
2849	let b = _mm512_set1_epi64(`1` << `63`);
2850	let c = _mm512_set1_epi64(`2`);
2851	let r = _mm512_mask_shldv_epi64(a, `0`, b, c);
2852	assert_eq_m512i(r, a);
2853	let r = _mm512_mask_shldv_epi64(a, `0b11111111`, b, c);
2854	let e = _mm512_set1_epi64(`6`);
2855	assert_eq_m512i(r, e);
2856	}
2857
2858	#[simd_test(enable = "avx512vbmi2")]
2859	unsafe fn test_mm512_maskz_shldv_epi64() {
2860	let a = _mm512_set1_epi64(`1`);
2861	let b = _mm512_set1_epi64(`1` << `63`);
2862	let c = _mm512_set1_epi64(`2`);
2863	let r = _mm512_maskz_shldv_epi64(`0`, a, b, c);
2864	assert_eq_m512i(r, _mm512_setzero_si512());
2865	let r = _mm512_maskz_shldv_epi64(`0b11111111`, a, b, c);
2866	let e = _mm512_set1_epi64(`6`);
2867	assert_eq_m512i(r, e);
2868	}
2869
2870	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2871	unsafe fn test_mm256_shldv_epi64() {
2872	let a = _mm256_set1_epi64x(`1`);
2873	let b = _mm256_set1_epi64x(`1` << `63`);
2874	let c = _mm256_set1_epi64x(`2`);
2875	let r = _mm256_shldv_epi64(a, b, c);
2876	let e = _mm256_set1_epi64x(`6`);
2877	assert_eq_m256i(r, e);
2878	}
2879
2880	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2881	unsafe fn test_mm256_mask_shldv_epi64() {
2882	let a = _mm256_set1_epi64x(`1`);
2883	let b = _mm256_set1_epi64x(`1` << `63`);
2884	let c = _mm256_set1_epi64x(`2`);
2885	let r = _mm256_mask_shldv_epi64(a, `0`, b, c);
2886	assert_eq_m256i(r, a);
2887	let r = _mm256_mask_shldv_epi64(a, `0b00001111`, b, c);
2888	let e = _mm256_set1_epi64x(`6`);
2889	assert_eq_m256i(r, e);
2890	}
2891
2892	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2893	unsafe fn test_mm256_maskz_shldv_epi64() {
2894	let a = _mm256_set1_epi64x(`1`);
2895	let b = _mm256_set1_epi64x(`1` << `63`);
2896	let c = _mm256_set1_epi64x(`2`);
2897	let r = _mm256_maskz_shldv_epi64(`0`, a, b, c);
2898	assert_eq_m256i(r, _mm256_setzero_si256());
2899	let r = _mm256_maskz_shldv_epi64(`0b00001111`, a, b, c);
2900	let e = _mm256_set1_epi64x(`6`);
2901	assert_eq_m256i(r, e);
2902	}
2903
2904	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2905	unsafe fn test_mm_shldv_epi64() {
2906	let a = _mm_set1_epi64x(`1`);
2907	let b = _mm_set1_epi64x(`1` << `63`);
2908	let c = _mm_set1_epi64x(`2`);
2909	let r = _mm_shldv_epi64(a, b, c);
2910	let e = _mm_set1_epi64x(`6`);
2911	assert_eq_m128i(r, e);
2912	}
2913
2914	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2915	unsafe fn test_mm_mask_shldv_epi64() {
2916	let a = _mm_set1_epi64x(`1`);
2917	let b = _mm_set1_epi64x(`1` << `63`);
2918	let c = _mm_set1_epi64x(`2`);
2919	let r = _mm_mask_shldv_epi64(a, `0`, b, c);
2920	assert_eq_m128i(r, a);
2921	let r = _mm_mask_shldv_epi64(a, `0b00000011`, b, c);
2922	let e = _mm_set1_epi64x(`6`);
2923	assert_eq_m128i(r, e);
2924	}
2925
2926	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2927	unsafe fn test_mm_maskz_shldv_epi64() {
2928	let a = _mm_set1_epi64x(`1`);
2929	let b = _mm_set1_epi64x(`1` << `63`);
2930	let c = _mm_set1_epi64x(`2`);
2931	let r = _mm_maskz_shldv_epi64(`0`, a, b, c);
2932	assert_eq_m128i(r, _mm_setzero_si128());
2933	let r = _mm_maskz_shldv_epi64(`0b00000011`, a, b, c);
2934	let e = _mm_set1_epi64x(`6`);
2935	assert_eq_m128i(r, e);
2936	}
2937
2938	#[simd_test(enable = "avx512vbmi2")]
2939	unsafe fn test_mm512_shldv_epi32() {
2940	let a = _mm512_set1_epi32(`1`);
2941	let b = _mm512_set1_epi32(`1` << `31`);
2942	let c = _mm512_set1_epi32(`2`);
2943	let r = _mm512_shldv_epi32(a, b, c);
2944	let e = _mm512_set1_epi32(`6`);
2945	assert_eq_m512i(r, e);
2946	}
2947
2948	#[simd_test(enable = "avx512vbmi2")]
2949	unsafe fn test_mm512_mask_shldv_epi32() {
2950	let a = _mm512_set1_epi32(`1`);
2951	let b = _mm512_set1_epi32(`1` << `31`);
2952	let c = _mm512_set1_epi32(`2`);
2953	let r = _mm512_mask_shldv_epi32(a, `0`, b, c);
2954	assert_eq_m512i(r, a);
2955	let r = _mm512_mask_shldv_epi32(a, `0b11111111_11111111`, b, c);
2956	let e = _mm512_set1_epi32(`6`);
2957	assert_eq_m512i(r, e);
2958	}
2959
2960	#[simd_test(enable = "avx512vbmi2")]
2961	unsafe fn test_mm512_maskz_shldv_epi32() {
2962	let a = _mm512_set1_epi32(`1`);
2963	let b = _mm512_set1_epi32(`1` << `31`);
2964	let c = _mm512_set1_epi32(`2`);
2965	let r = _mm512_maskz_shldv_epi32(`0`, a, b, c);
2966	assert_eq_m512i(r, _mm512_setzero_si512());
2967	let r = _mm512_maskz_shldv_epi32(`0b11111111_11111111`, a, b, c);
2968	let e = _mm512_set1_epi32(`6`);
2969	assert_eq_m512i(r, e);
2970	}
2971
2972	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2973	unsafe fn test_mm256_shldv_epi32() {
2974	let a = _mm256_set1_epi32(`1`);
2975	let b = _mm256_set1_epi32(`1` << `31`);
2976	let c = _mm256_set1_epi32(`2`);
2977	let r = _mm256_shldv_epi32(a, b, c);
2978	let e = _mm256_set1_epi32(`6`);
2979	assert_eq_m256i(r, e);
2980	}
2981
2982	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2983	unsafe fn test_mm256_mask_shldv_epi32() {
2984	let a = _mm256_set1_epi32(`1`);
2985	let b = _mm256_set1_epi32(`1` << `31`);
2986	let c = _mm256_set1_epi32(`2`);
2987	let r = _mm256_mask_shldv_epi32(a, `0`, b, c);
2988	assert_eq_m256i(r, a);
2989	let r = _mm256_mask_shldv_epi32(a, `0b11111111`, b, c);
2990	let e = _mm256_set1_epi32(`6`);
2991	assert_eq_m256i(r, e);
2992	}
2993
2994	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2995	unsafe fn test_mm256_maskz_shldv_epi32() {
2996	let a = _mm256_set1_epi32(`1`);
2997	let b = _mm256_set1_epi32(`1` << `31`);
2998	let c = _mm256_set1_epi32(`2`);
2999	let r = _mm256_maskz_shldv_epi32(`0`, a, b, c);
3000	assert_eq_m256i(r, _mm256_setzero_si256());
3001	let r = _mm256_maskz_shldv_epi32(`0b11111111`, a, b, c);
3002	let e = _mm256_set1_epi32(`6`);
3003	assert_eq_m256i(r, e);
3004	}
3005
3006	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3007	unsafe fn test_mm_shldv_epi32() {
3008	let a = _mm_set1_epi32(`1`);
3009	let b = _mm_set1_epi32(`1` << `31`);
3010	let c = _mm_set1_epi32(`2`);
3011	let r = _mm_shldv_epi32(a, b, c);
3012	let e = _mm_set1_epi32(`6`);
3013	assert_eq_m128i(r, e);
3014	}
3015
3016	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3017	unsafe fn test_mm_mask_shldv_epi32() {
3018	let a = _mm_set1_epi32(`1`);
3019	let b = _mm_set1_epi32(`1` << `31`);
3020	let c = _mm_set1_epi32(`2`);
3021	let r = _mm_mask_shldv_epi32(a, `0`, b, c);
3022	assert_eq_m128i(r, a);
3023	let r = _mm_mask_shldv_epi32(a, `0b00001111`, b, c);
3024	let e = _mm_set1_epi32(`6`);
3025	assert_eq_m128i(r, e);
3026	}
3027
3028	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3029	unsafe fn test_mm_maskz_shldv_epi32() {
3030	let a = _mm_set1_epi32(`1`);
3031	let b = _mm_set1_epi32(`1` << `31`);
3032	let c = _mm_set1_epi32(`2`);
3033	let r = _mm_maskz_shldv_epi32(`0`, a, b, c);
3034	assert_eq_m128i(r, _mm_setzero_si128());
3035	let r = _mm_maskz_shldv_epi32(`0b00001111`, a, b, c);
3036	let e = _mm_set1_epi32(`6`);
3037	assert_eq_m128i(r, e);
3038	}
3039
3040	#[simd_test(enable = "avx512vbmi2")]
3041	unsafe fn test_mm512_shldv_epi16() {
3042	let a = _mm512_set1_epi16(`1`);
3043	let b = _mm512_set1_epi16(`1` << `15`);
3044	let c = _mm512_set1_epi16(`2`);
3045	let r = _mm512_shldv_epi16(a, b, c);
3046	let e = _mm512_set1_epi16(`6`);
3047	assert_eq_m512i(r, e);
3048	}
3049
3050	#[simd_test(enable = "avx512vbmi2")]
3051	unsafe fn test_mm512_mask_shldv_epi16() {
3052	let a = _mm512_set1_epi16(`1`);
3053	let b = _mm512_set1_epi16(`1` << `15`);
3054	let c = _mm512_set1_epi16(`2`);
3055	let r = _mm512_mask_shldv_epi16(a, `0`, b, c);
3056	assert_eq_m512i(r, a);
3057	let r = _mm512_mask_shldv_epi16(a, `0b11111111_11111111_11111111_11111111`, b, c);
3058	let e = _mm512_set1_epi16(`6`);
3059	assert_eq_m512i(r, e);
3060	}
3061
3062	#[simd_test(enable = "avx512vbmi2")]
3063	unsafe fn test_mm512_maskz_shldv_epi16() {
3064	let a = _mm512_set1_epi16(`1`);
3065	let b = _mm512_set1_epi16(`1` << `15`);
3066	let c = _mm512_set1_epi16(`2`);
3067	let r = _mm512_maskz_shldv_epi16(`0`, a, b, c);
3068	assert_eq_m512i(r, _mm512_setzero_si512());
3069	let r = _mm512_maskz_shldv_epi16(`0b11111111_11111111_11111111_11111111`, a, b, c);
3070	let e = _mm512_set1_epi16(`6`);
3071	assert_eq_m512i(r, e);
3072	}
3073
3074	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3075	unsafe fn test_mm256_shldv_epi16() {
3076	let a = _mm256_set1_epi16(`1`);
3077	let b = _mm256_set1_epi16(`1` << `15`);
3078	let c = _mm256_set1_epi16(`2`);
3079	let r = _mm256_shldv_epi16(a, b, c);
3080	let e = _mm256_set1_epi16(`6`);
3081	assert_eq_m256i(r, e);
3082	}
3083
3084	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3085	unsafe fn test_mm256_mask_shldv_epi16() {
3086	let a = _mm256_set1_epi16(`1`);
3087	let b = _mm256_set1_epi16(`1` << `15`);
3088	let c = _mm256_set1_epi16(`2`);
3089	let r = _mm256_mask_shldv_epi16(a, `0`, b, c);
3090	assert_eq_m256i(r, a);
3091	let r = _mm256_mask_shldv_epi16(a, `0b11111111_11111111`, b, c);
3092	let e = _mm256_set1_epi16(`6`);
3093	assert_eq_m256i(r, e);
3094	}
3095
3096	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3097	unsafe fn test_mm256_maskz_shldv_epi16() {
3098	let a = _mm256_set1_epi16(`1`);
3099	let b = _mm256_set1_epi16(`1` << `15`);
3100	let c = _mm256_set1_epi16(`2`);
3101	let r = _mm256_maskz_shldv_epi16(`0`, a, b, c);
3102	assert_eq_m256i(r, _mm256_setzero_si256());
3103	let r = _mm256_maskz_shldv_epi16(`0b11111111_11111111`, a, b, c);
3104	let e = _mm256_set1_epi16(`6`);
3105	assert_eq_m256i(r, e);
3106	}
3107
3108	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3109	unsafe fn test_mm_shldv_epi16() {
3110	let a = _mm_set1_epi16(`1`);
3111	let b = _mm_set1_epi16(`1` << `15`);
3112	let c = _mm_set1_epi16(`2`);
3113	let r = _mm_shldv_epi16(a, b, c);
3114	let e = _mm_set1_epi16(`6`);
3115	assert_eq_m128i(r, e);
3116	}
3117
3118	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3119	unsafe fn test_mm_mask_shldv_epi16() {
3120	let a = _mm_set1_epi16(`1`);
3121	let b = _mm_set1_epi16(`1` << `15`);
3122	let c = _mm_set1_epi16(`2`);
3123	let r = _mm_mask_shldv_epi16(a, `0`, b, c);
3124	assert_eq_m128i(r, a);
3125	let r = _mm_mask_shldv_epi16(a, `0b11111111`, b, c);
3126	let e = _mm_set1_epi16(`6`);
3127	assert_eq_m128i(r, e);
3128	}
3129
3130	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3131	unsafe fn test_mm_maskz_shldv_epi16() {
3132	let a = _mm_set1_epi16(`1`);
3133	let b = _mm_set1_epi16(`1` << `15`);
3134	let c = _mm_set1_epi16(`2`);
3135	let r = _mm_maskz_shldv_epi16(`0`, a, b, c);
3136	assert_eq_m128i(r, _mm_setzero_si128());
3137	let r = _mm_maskz_shldv_epi16(`0b11111111`, a, b, c);
3138	let e = _mm_set1_epi16(`6`);
3139	assert_eq_m128i(r, e);
3140	}
3141
3142	#[simd_test(enable = "avx512vbmi2")]
3143	unsafe fn test_mm512_shrdv_epi64() {
3144	let a = _mm512_set1_epi64(`8`);
3145	let b = _mm512_set1_epi64(`2`);
3146	let c = _mm512_set1_epi64(`1`);
3147	let r = _mm512_shrdv_epi64(a, b, c);
3148	let e = _mm512_set1_epi64(`1`);
3149	assert_eq_m512i(r, e);
3150	}
3151
3152	#[simd_test(enable = "avx512vbmi2")]
3153	unsafe fn test_mm512_mask_shrdv_epi64() {
3154	let a = _mm512_set1_epi64(`8`);
3155	let b = _mm512_set1_epi64(`2`);
3156	let c = _mm512_set1_epi64(`1`);
3157	let r = _mm512_mask_shrdv_epi64(a, `0`, b, c);
3158	assert_eq_m512i(r, a);
3159	let r = _mm512_mask_shrdv_epi64(a, `0b11111111`, b, c);
3160	let e = _mm512_set1_epi64(`1`);
3161	assert_eq_m512i(r, e);
3162	}
3163
3164	#[simd_test(enable = "avx512vbmi2")]
3165	unsafe fn test_mm512_maskz_shrdv_epi64() {
3166	let a = _mm512_set1_epi64(`8`);
3167	let b = _mm512_set1_epi64(`2`);
3168	let c = _mm512_set1_epi64(`1`);
3169	let r = _mm512_maskz_shrdv_epi64(`0`, a, b, c);
3170	assert_eq_m512i(r, _mm512_setzero_si512());
3171	let r = _mm512_maskz_shrdv_epi64(`0b11111111`, a, b, c);
3172	let e = _mm512_set1_epi64(`1`);
3173	assert_eq_m512i(r, e);
3174	}
3175
3176	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3177	unsafe fn test_mm256_shrdv_epi64() {
3178	let a = _mm256_set1_epi64x(`8`);
3179	let b = _mm256_set1_epi64x(`2`);
3180	let c = _mm256_set1_epi64x(`1`);
3181	let r = _mm256_shrdv_epi64(a, b, c);
3182	let e = _mm256_set1_epi64x(`1`);
3183	assert_eq_m256i(r, e);
3184	}
3185
3186	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3187	unsafe fn test_mm256_mask_shrdv_epi64() {
3188	let a = _mm256_set1_epi64x(`8`);
3189	let b = _mm256_set1_epi64x(`2`);
3190	let c = _mm256_set1_epi64x(`1`);
3191	let r = _mm256_mask_shrdv_epi64(a, `0`, b, c);
3192	assert_eq_m256i(r, a);
3193	let r = _mm256_mask_shrdv_epi64(a, `0b00001111`, b, c);
3194	let e = _mm256_set1_epi64x(`1`);
3195	assert_eq_m256i(r, e);
3196	}
3197
3198	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3199	unsafe fn test_mm256_maskz_shrdv_epi64() {
3200	let a = _mm256_set1_epi64x(`8`);
3201	let b = _mm256_set1_epi64x(`2`);
3202	let c = _mm256_set1_epi64x(`1`);
3203	let r = _mm256_maskz_shrdv_epi64(`0`, a, b, c);
3204	assert_eq_m256i(r, _mm256_setzero_si256());
3205	let r = _mm256_maskz_shrdv_epi64(`0b00001111`, a, b, c);
3206	let e = _mm256_set1_epi64x(`1`);
3207	assert_eq_m256i(r, e);
3208	}
3209
3210	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3211	unsafe fn test_mm_shrdv_epi64() {
3212	let a = _mm_set1_epi64x(`8`);
3213	let b = _mm_set1_epi64x(`2`);
3214	let c = _mm_set1_epi64x(`1`);
3215	let r = _mm_shrdv_epi64(a, b, c);
3216	let e = _mm_set1_epi64x(`1`);
3217	assert_eq_m128i(r, e);
3218	}
3219
3220	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3221	unsafe fn test_mm_mask_shrdv_epi64() {
3222	let a = _mm_set1_epi64x(`8`);
3223	let b = _mm_set1_epi64x(`2`);
3224	let c = _mm_set1_epi64x(`1`);
3225	let r = _mm_mask_shrdv_epi64(a, `0`, b, c);
3226	assert_eq_m128i(r, a);
3227	let r = _mm_mask_shrdv_epi64(a, `0b00000011`, b, c);
3228	let e = _mm_set1_epi64x(`1`);
3229	assert_eq_m128i(r, e);
3230	}
3231
3232	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3233	unsafe fn test_mm_maskz_shrdv_epi64() {
3234	let a = _mm_set1_epi64x(`8`);
3235	let b = _mm_set1_epi64x(`2`);
3236	let c = _mm_set1_epi64x(`1`);
3237	let r = _mm_maskz_shrdv_epi64(`0`, a, b, c);
3238	assert_eq_m128i(r, _mm_setzero_si128());
3239	let r = _mm_maskz_shrdv_epi64(`0b00000011`, a, b, c);
3240	let e = _mm_set1_epi64x(`1`);
3241	assert_eq_m128i(r, e);
3242	}
3243
3244	#[simd_test(enable = "avx512vbmi2")]
3245	unsafe fn test_mm512_shrdv_epi32() {
3246	let a = _mm512_set1_epi32(`8`);
3247	let b = _mm512_set1_epi32(`2`);
3248	let c = _mm512_set1_epi32(`1`);
3249	let r = _mm512_shrdv_epi32(a, b, c);
3250	let e = _mm512_set1_epi32(`1`);
3251	assert_eq_m512i(r, e);
3252	}
3253
3254	#[simd_test(enable = "avx512vbmi2")]
3255	unsafe fn test_mm512_mask_shrdv_epi32() {
3256	let a = _mm512_set1_epi32(`8`);
3257	let b = _mm512_set1_epi32(`2`);
3258	let c = _mm512_set1_epi32(`1`);
3259	let r = _mm512_mask_shrdv_epi32(a, `0`, b, c);
3260	assert_eq_m512i(r, a);
3261	let r = _mm512_mask_shrdv_epi32(a, `0b11111111_11111111`, b, c);
3262	let e = _mm512_set1_epi32(`1`);
3263	assert_eq_m512i(r, e);
3264	}
3265
3266	#[simd_test(enable = "avx512vbmi2")]
3267	unsafe fn test_mm512_maskz_shrdv_epi32() {
3268	let a = _mm512_set1_epi32(`8`);
3269	let b = _mm512_set1_epi32(`2`);
3270	let c = _mm512_set1_epi32(`1`);
3271	let r = _mm512_maskz_shrdv_epi32(`0`, a, b, c);
3272	assert_eq_m512i(r, _mm512_setzero_si512());
3273	let r = _mm512_maskz_shrdv_epi32(`0b11111111_11111111`, a, b, c);
3274	let e = _mm512_set1_epi32(`1`);
3275	assert_eq_m512i(r, e);
3276	}
3277
3278	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3279	unsafe fn test_mm256_shrdv_epi32() {
3280	let a = _mm256_set1_epi32(`8`);
3281	let b = _mm256_set1_epi32(`2`);
3282	let c = _mm256_set1_epi32(`1`);
3283	let r = _mm256_shrdv_epi32(a, b, c);
3284	let e = _mm256_set1_epi32(`1`);
3285	assert_eq_m256i(r, e);
3286	}
3287
3288	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3289	unsafe fn test_mm256_mask_shrdv_epi32() {
3290	let a = _mm256_set1_epi32(`8`);
3291	let b = _mm256_set1_epi32(`2`);
3292	let c = _mm256_set1_epi32(`1`);
3293	let r = _mm256_mask_shrdv_epi32(a, `0`, b, c);
3294	assert_eq_m256i(r, a);
3295	let r = _mm256_mask_shrdv_epi32(a, `0b11111111`, b, c);
3296	let e = _mm256_set1_epi32(`1`);
3297	assert_eq_m256i(r, e);
3298	}
3299
3300	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3301	unsafe fn test_mm256_maskz_shrdv_epi32() {
3302	let a = _mm256_set1_epi32(`8`);
3303	let b = _mm256_set1_epi32(`2`);
3304	let c = _mm256_set1_epi32(`1`);
3305	let r = _mm256_maskz_shrdv_epi32(`0`, a, b, c);
3306	assert_eq_m256i(r, _mm256_setzero_si256());
3307	let r = _mm256_maskz_shrdv_epi32(`0b11111111`, a, b, c);
3308	let e = _mm256_set1_epi32(`1`);
3309	assert_eq_m256i(r, e);
3310	}
3311
3312	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3313	unsafe fn test_mm_shrdv_epi32() {
3314	let a = _mm_set1_epi32(`8`);
3315	let b = _mm_set1_epi32(`2`);
3316	let c = _mm_set1_epi32(`1`);
3317	let r = _mm_shrdv_epi32(a, b, c);
3318	let e = _mm_set1_epi32(`1`);
3319	assert_eq_m128i(r, e);
3320	}
3321
3322	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3323	unsafe fn test_mm_mask_shrdv_epi32() {
3324	let a = _mm_set1_epi32(`8`);
3325	let b = _mm_set1_epi32(`2`);
3326	let c = _mm_set1_epi32(`1`);
3327	let r = _mm_mask_shrdv_epi32(a, `0`, b, c);
3328	assert_eq_m128i(r, a);
3329	let r = _mm_mask_shrdv_epi32(a, `0b00001111`, b, c);
3330	let e = _mm_set1_epi32(`1`);
3331	assert_eq_m128i(r, e);
3332	}
3333
3334	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3335	unsafe fn test_mm_maskz_shrdv_epi32() {
3336	let a = _mm_set1_epi32(`8`);
3337	let b = _mm_set1_epi32(`2`);
3338	let c = _mm_set1_epi32(`1`);
3339	let r = _mm_maskz_shrdv_epi32(`0`, a, b, c);
3340	assert_eq_m128i(r, _mm_setzero_si128());
3341	let r = _mm_maskz_shrdv_epi32(`0b00001111`, a, b, c);
3342	let e = _mm_set1_epi32(`1`);
3343	assert_eq_m128i(r, e);
3344	}
3345
3346	#[simd_test(enable = "avx512vbmi2")]
3347	unsafe fn test_mm512_shrdv_epi16() {
3348	let a = _mm512_set1_epi16(`8`);
3349	let b = _mm512_set1_epi16(`2`);
3350	let c = _mm512_set1_epi16(`1`);
3351	let r = _mm512_shrdv_epi16(a, b, c);
3352	let e = _mm512_set1_epi16(`1`);
3353	assert_eq_m512i(r, e);
3354	}
3355
3356	#[simd_test(enable = "avx512vbmi2")]
3357	unsafe fn test_mm512_mask_shrdv_epi16() {
3358	let a = _mm512_set1_epi16(`8`);
3359	let b = _mm512_set1_epi16(`2`);
3360	let c = _mm512_set1_epi16(`1`);
3361	let r = _mm512_mask_shrdv_epi16(a, `0`, b, c);
3362	assert_eq_m512i(r, a);
3363	let r = _mm512_mask_shrdv_epi16(a, `0b11111111_11111111_11111111_11111111`, b, c);
3364	let e = _mm512_set1_epi16(`1`);
3365	assert_eq_m512i(r, e);
3366	}
3367
3368	#[simd_test(enable = "avx512vbmi2")]
3369	unsafe fn test_mm512_maskz_shrdv_epi16() {
3370	let a = _mm512_set1_epi16(`8`);
3371	let b = _mm512_set1_epi16(`2`);
3372	let c = _mm512_set1_epi16(`1`);
3373	let r = _mm512_maskz_shrdv_epi16(`0`, a, b, c);
3374	assert_eq_m512i(r, _mm512_setzero_si512());
3375	let r = _mm512_maskz_shrdv_epi16(`0b11111111_11111111_11111111_11111111`, a, b, c);
3376	let e = _mm512_set1_epi16(`1`);
3377	assert_eq_m512i(r, e);
3378	}
3379
3380	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3381	unsafe fn test_mm256_shrdv_epi16() {
3382	let a = _mm256_set1_epi16(`8`);
3383	let b = _mm256_set1_epi16(`2`);
3384	let c = _mm256_set1_epi16(`1`);
3385	let r = _mm256_shrdv_epi16(a, b, c);
3386	let e = _mm256_set1_epi16(`1`);
3387	assert_eq_m256i(r, e);
3388	}
3389
3390	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3391	unsafe fn test_mm256_mask_shrdv_epi16() {
3392	let a = _mm256_set1_epi16(`8`);
3393	let b = _mm256_set1_epi16(`2`);
3394	let c = _mm256_set1_epi16(`1`);
3395	let r = _mm256_mask_shrdv_epi16(a, `0`, b, c);
3396	assert_eq_m256i(r, a);
3397	let r = _mm256_mask_shrdv_epi16(a, `0b11111111_11111111`, b, c);
3398	let e = _mm256_set1_epi16(`1`);
3399	assert_eq_m256i(r, e);
3400	}
3401
3402	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3403	unsafe fn test_mm256_maskz_shrdv_epi16() {
3404	let a = _mm256_set1_epi16(`8`);
3405	let b = _mm256_set1_epi16(`2`);
3406	let c = _mm256_set1_epi16(`1`);
3407	let r = _mm256_maskz_shrdv_epi16(`0`, a, b, c);
3408	assert_eq_m256i(r, _mm256_setzero_si256());
3409	let r = _mm256_maskz_shrdv_epi16(`0b11111111_11111111`, a, b, c);
3410	let e = _mm256_set1_epi16(`1`);
3411	assert_eq_m256i(r, e);
3412	}
3413
3414	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3415	unsafe fn test_mm_shrdv_epi16() {
3416	let a = _mm_set1_epi16(`8`);
3417	let b = _mm_set1_epi16(`2`);
3418	let c = _mm_set1_epi16(`1`);
3419	let r = _mm_shrdv_epi16(a, b, c);
3420	let e = _mm_set1_epi16(`1`);
3421	assert_eq_m128i(r, e);
3422	}
3423
3424	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3425	unsafe fn test_mm_mask_shrdv_epi16() {
3426	let a = _mm_set1_epi16(`8`);
3427	let b = _mm_set1_epi16(`2`);
3428	let c = _mm_set1_epi16(`1`);
3429	let r = _mm_mask_shrdv_epi16(a, `0`, b, c);
3430	assert_eq_m128i(r, a);
3431	let r = _mm_mask_shrdv_epi16(a, `0b11111111`, b, c);
3432	let e = _mm_set1_epi16(`1`);
3433	assert_eq_m128i(r, e);
3434	}
3435
3436	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3437	unsafe fn test_mm_maskz_shrdv_epi16() {
3438	let a = _mm_set1_epi16(`8`);
3439	let b = _mm_set1_epi16(`2`);
3440	let c = _mm_set1_epi16(`1`);
3441	let r = _mm_maskz_shrdv_epi16(`0`, a, b, c);
3442	assert_eq_m128i(r, _mm_setzero_si128());
3443	let r = _mm_maskz_shrdv_epi16(`0b11111111`, a, b, c);
3444	let e = _mm_set1_epi16(`1`);
3445	assert_eq_m128i(r, e);
3446	}
3447
3448	#[simd_test(enable = "avx512vbmi2")]
3449	unsafe fn test_mm512_shldi_epi64() {
3450	let a = _mm512_set1_epi64(`1`);
3451	let b = _mm512_set1_epi64(`1` << `63`);
3452	let r = _mm512_shldi_epi64::<`2`>(a, b);
3453	let e = _mm512_set1_epi64(`6`);
3454	assert_eq_m512i(r, e);
3455	}
3456
3457	#[simd_test(enable = "avx512vbmi2")]
3458	unsafe fn test_mm512_mask_shldi_epi64() {
3459	let a = _mm512_set1_epi64(`1`);
3460	let b = _mm512_set1_epi64(`1` << `63`);
3461	let r = _mm512_mask_shldi_epi64::<`2`>(a, `0`, a, b);
3462	assert_eq_m512i(r, a);
3463	let r = _mm512_mask_shldi_epi64::<`2`>(a, `0b11111111`, a, b);
3464	let e = _mm512_set1_epi64(`6`);
3465	assert_eq_m512i(r, e);
3466	}
3467
3468	#[simd_test(enable = "avx512vbmi2")]
3469	unsafe fn test_mm512_maskz_shldi_epi64() {
3470	let a = _mm512_set1_epi64(`1`);
3471	let b = _mm512_set1_epi64(`1` << `63`);
3472	let r = _mm512_maskz_shldi_epi64::<`2`>(`0`, a, b);
3473	assert_eq_m512i(r, _mm512_setzero_si512());
3474	let r = _mm512_maskz_shldi_epi64::<`2`>(`0b11111111`, a, b);
3475	let e = _mm512_set1_epi64(`6`);
3476	assert_eq_m512i(r, e);
3477	}
3478
3479	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3480	unsafe fn test_mm256_shldi_epi64() {
3481	let a = _mm256_set1_epi64x(`1`);
3482	let b = _mm256_set1_epi64x(`1` << `63`);
3483	let r = _mm256_shldi_epi64::<`2`>(a, b);
3484	let e = _mm256_set1_epi64x(`6`);
3485	assert_eq_m256i(r, e);
3486	}
3487
3488	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3489	unsafe fn test_mm256_mask_shldi_epi64() {
3490	let a = _mm256_set1_epi64x(`1`);
3491	let b = _mm256_set1_epi64x(`1` << `63`);
3492	let r = _mm256_mask_shldi_epi64::<`2`>(a, `0`, a, b);
3493	assert_eq_m256i(r, a);
3494	let r = _mm256_mask_shldi_epi64::<`2`>(a, `0b00001111`, a, b);
3495	let e = _mm256_set1_epi64x(`6`);
3496	assert_eq_m256i(r, e);
3497	}
3498
3499	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3500	unsafe fn test_mm256_maskz_shldi_epi64() {
3501	let a = _mm256_set1_epi64x(`1`);
3502	let b = _mm256_set1_epi64x(`1` << `63`);
3503	let r = _mm256_maskz_shldi_epi64::<`2`>(`0`, a, b);
3504	assert_eq_m256i(r, _mm256_setzero_si256());
3505	let r = _mm256_maskz_shldi_epi64::<`2`>(`0b00001111`, a, b);
3506	let e = _mm256_set1_epi64x(`6`);
3507	assert_eq_m256i(r, e);
3508	}
3509
3510	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3511	unsafe fn test_mm_shldi_epi64() {
3512	let a = _mm_set1_epi64x(`1`);
3513	let b = _mm_set1_epi64x(`1` << `63`);
3514	let r = _mm_shldi_epi64::<`2`>(a, b);
3515	let e = _mm_set1_epi64x(`6`);
3516	assert_eq_m128i(r, e);
3517	}
3518
3519	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3520	unsafe fn test_mm_mask_shldi_epi64() {
3521	let a = _mm_set1_epi64x(`1`);
3522	let b = _mm_set1_epi64x(`1` << `63`);
3523	let r = _mm_mask_shldi_epi64::<`2`>(a, `0`, a, b);
3524	assert_eq_m128i(r, a);
3525	let r = _mm_mask_shldi_epi64::<`2`>(a, `0b00000011`, a, b);
3526	let e = _mm_set1_epi64x(`6`);
3527	assert_eq_m128i(r, e);
3528	}
3529
3530	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3531	unsafe fn test_mm_maskz_shldi_epi64() {
3532	let a = _mm_set1_epi64x(`1`);
3533	let b = _mm_set1_epi64x(`1` << `63`);
3534	let r = _mm_maskz_shldi_epi64::<`2`>(`0`, a, b);
3535	assert_eq_m128i(r, _mm_setzero_si128());
3536	let r = _mm_maskz_shldi_epi64::<`2`>(`0b00000011`, a, b);
3537	let e = _mm_set1_epi64x(`6`);
3538	assert_eq_m128i(r, e);
3539	}
3540
3541	#[simd_test(enable = "avx512vbmi2")]
3542	unsafe fn test_mm512_shldi_epi32() {
3543	let a = _mm512_set1_epi32(`1`);
3544	let b = _mm512_set1_epi32(`1` << `31`);
3545	let r = _mm512_shldi_epi32::<`2`>(a, b);
3546	let e = _mm512_set1_epi32(`6`);
3547	assert_eq_m512i(r, e);
3548	}
3549
3550	#[simd_test(enable = "avx512vbmi2")]
3551	unsafe fn test_mm512_mask_shldi_epi32() {
3552	let a = _mm512_set1_epi32(`1`);
3553	let b = _mm512_set1_epi32(`1` << `31`);
3554	let r = _mm512_mask_shldi_epi32::<`2`>(a, `0`, a, b);
3555	assert_eq_m512i(r, a);
3556	let r = _mm512_mask_shldi_epi32::<`2`>(a, `0b11111111_11111111`, a, b);
3557	let e = _mm512_set1_epi32(`6`);
3558	assert_eq_m512i(r, e);
3559	}
3560
3561	#[simd_test(enable = "avx512vbmi2")]
3562	unsafe fn test_mm512_maskz_shldi_epi32() {
3563	let a = _mm512_set1_epi32(`1`);
3564	let b = _mm512_set1_epi32(`1` << `31`);
3565	let r = _mm512_maskz_shldi_epi32::<`2`>(`0`, a, b);
3566	assert_eq_m512i(r, _mm512_setzero_si512());
3567	let r = _mm512_maskz_shldi_epi32::<`2`>(`0b11111111_11111111`, a, b);
3568	let e = _mm512_set1_epi32(`6`);
3569	assert_eq_m512i(r, e);
3570	}
3571
3572	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3573	unsafe fn test_mm256_shldi_epi32() {
3574	let a = _mm256_set1_epi32(`1`);
3575	let b = _mm256_set1_epi32(`1` << `31`);
3576	let r = _mm256_shldi_epi32::<`2`>(a, b);
3577	let e = _mm256_set1_epi32(`6`);
3578	assert_eq_m256i(r, e);
3579	}
3580
3581	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3582	unsafe fn test_mm256_mask_shldi_epi32() {
3583	let a = _mm256_set1_epi32(`1`);
3584	let b = _mm256_set1_epi32(`1` << `31`);
3585	let r = _mm256_mask_shldi_epi32::<`2`>(a, `0`, a, b);
3586	assert_eq_m256i(r, a);
3587	let r = _mm256_mask_shldi_epi32::<`2`>(a, `0b11111111`, a, b);
3588	let e = _mm256_set1_epi32(`6`);
3589	assert_eq_m256i(r, e);
3590	}
3591
3592	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3593	unsafe fn test_mm256_maskz_shldi_epi32() {
3594	let a = _mm256_set1_epi32(`1`);
3595	let b = _mm256_set1_epi32(`1` << `31`);
3596	let r = _mm256_maskz_shldi_epi32::<`2`>(`0`, a, b);
3597	assert_eq_m256i(r, _mm256_setzero_si256());
3598	let r = _mm256_maskz_shldi_epi32::<`2`>(`0b11111111`, a, b);
3599	let e = _mm256_set1_epi32(`6`);
3600	assert_eq_m256i(r, e);
3601	}
3602
3603	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3604	unsafe fn test_mm_shldi_epi32() {
3605	let a = _mm_set1_epi32(`1`);
3606	let b = _mm_set1_epi32(`1` << `31`);
3607	let r = _mm_shldi_epi32::<`2`>(a, b);
3608	let e = _mm_set1_epi32(`6`);
3609	assert_eq_m128i(r, e);
3610	}
3611
3612	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3613	unsafe fn test_mm_mask_shldi_epi32() {
3614	let a = _mm_set1_epi32(`1`);
3615	let b = _mm_set1_epi32(`1` << `31`);
3616	let r = _mm_mask_shldi_epi32::<`2`>(a, `0`, a, b);
3617	assert_eq_m128i(r, a);
3618	let r = _mm_mask_shldi_epi32::<`2`>(a, `0b00001111`, a, b);
3619	let e = _mm_set1_epi32(`6`);
3620	assert_eq_m128i(r, e);
3621	}
3622
3623	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3624	unsafe fn test_mm_maskz_shldi_epi32() {
3625	let a = _mm_set1_epi32(`1`);
3626	let b = _mm_set1_epi32(`1` << `31`);
3627	let r = _mm_maskz_shldi_epi32::<`2`>(`0`, a, b);
3628	assert_eq_m128i(r, _mm_setzero_si128());
3629	let r = _mm_maskz_shldi_epi32::<`2`>(`0b00001111`, a, b);
3630	let e = _mm_set1_epi32(`6`);
3631	assert_eq_m128i(r, e);
3632	}
3633
3634	#[simd_test(enable = "avx512vbmi2")]
3635	unsafe fn test_mm512_shldi_epi16() {
3636	let a = _mm512_set1_epi16(`1`);
3637	let b = _mm512_set1_epi16(`1` << `15`);
3638	let r = _mm512_shldi_epi16::<`2`>(a, b);
3639	let e = _mm512_set1_epi16(`6`);
3640	assert_eq_m512i(r, e);
3641	}
3642
3643	#[simd_test(enable = "avx512vbmi2")]
3644	unsafe fn test_mm512_mask_shldi_epi16() {
3645	let a = _mm512_set1_epi16(`1`);
3646	let b = _mm512_set1_epi16(`1` << `15`);
3647	let r = _mm512_mask_shldi_epi16::<`2`>(a, `0`, a, b);
3648	assert_eq_m512i(r, a);
3649	let r = _mm512_mask_shldi_epi16::<`2`>(a, `0b11111111_11111111_11111111_11111111`, a, b);
3650	let e = _mm512_set1_epi16(`6`);
3651	assert_eq_m512i(r, e);
3652	}
3653
3654	#[simd_test(enable = "avx512vbmi2")]
3655	unsafe fn test_mm512_maskz_shldi_epi16() {
3656	let a = _mm512_set1_epi16(`1`);
3657	let b = _mm512_set1_epi16(`1` << `15`);
3658	let r = _mm512_maskz_shldi_epi16::<`2`>(`0`, a, b);
3659	assert_eq_m512i(r, _mm512_setzero_si512());
3660	let r = _mm512_maskz_shldi_epi16::<`2`>(`0b11111111_11111111_11111111_11111111`, a, b);
3661	let e = _mm512_set1_epi16(`6`);
3662	assert_eq_m512i(r, e);
3663	}
3664
3665	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3666	unsafe fn test_mm256_shldi_epi16() {
3667	let a = _mm256_set1_epi16(`1`);
3668	let b = _mm256_set1_epi16(`1` << `15`);
3669	let r = _mm256_shldi_epi16::<`2`>(a, b);
3670	let e = _mm256_set1_epi16(`6`);
3671	assert_eq_m256i(r, e);
3672	}
3673
3674	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3675	unsafe fn test_mm256_mask_shldi_epi16() {
3676	let a = _mm256_set1_epi16(`1`);
3677	let b = _mm256_set1_epi16(`1` << `15`);
3678	let r = _mm256_mask_shldi_epi16::<`2`>(a, `0`, a, b);
3679	assert_eq_m256i(r, a);
3680	let r = _mm256_mask_shldi_epi16::<`2`>(a, `0b11111111_11111111`, a, b);
3681	let e = _mm256_set1_epi16(`6`);
3682	assert_eq_m256i(r, e);
3683	}
3684
3685	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3686	unsafe fn test_mm256_maskz_shldi_epi16() {
3687	let a = _mm256_set1_epi16(`1`);
3688	let b = _mm256_set1_epi16(`1` << `15`);
3689	let r = _mm256_maskz_shldi_epi16::<`2`>(`0`, a, b);
3690	assert_eq_m256i(r, _mm256_setzero_si256());
3691	let r = _mm256_maskz_shldi_epi16::<`2`>(`0b11111111_11111111`, a, b);
3692	let e = _mm256_set1_epi16(`6`);
3693	assert_eq_m256i(r, e);
3694	}
3695
3696	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3697	unsafe fn test_mm_shldi_epi16() {
3698	let a = _mm_set1_epi16(`1`);
3699	let b = _mm_set1_epi16(`1` << `15`);
3700	let r = _mm_shldi_epi16::<`2`>(a, b);
3701	let e = _mm_set1_epi16(`6`);
3702	assert_eq_m128i(r, e);
3703	}
3704
3705	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3706	unsafe fn test_mm_mask_shldi_epi16() {
3707	let a = _mm_set1_epi16(`1`);
3708	let b = _mm_set1_epi16(`1` << `15`);
3709	let r = _mm_mask_shldi_epi16::<`2`>(a, `0`, a, b);
3710	assert_eq_m128i(r, a);
3711	let r = _mm_mask_shldi_epi16::<`2`>(a, `0b11111111`, a, b);
3712	let e = _mm_set1_epi16(`6`);
3713	assert_eq_m128i(r, e);
3714	}
3715
3716	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3717	unsafe fn test_mm_maskz_shldi_epi16() {
3718	let a = _mm_set1_epi16(`1`);
3719	let b = _mm_set1_epi16(`1` << `15`);
3720	let r = _mm_maskz_shldi_epi16::<`2`>(`0`, a, b);
3721	assert_eq_m128i(r, _mm_setzero_si128());
3722	let r = _mm_maskz_shldi_epi16::<`2`>(`0b11111111`, a, b);
3723	let e = _mm_set1_epi16(`6`);
3724	assert_eq_m128i(r, e);
3725	}
3726
3727	#[simd_test(enable = "avx512vbmi2")]
3728	unsafe fn test_mm512_shrdi_epi64() {
3729	let a = _mm512_set1_epi64(`8`);
3730	let b = _mm512_set1_epi64(`2`);
3731	let r = _mm512_shrdi_epi64::<`1`>(a, b);
3732	let e = _mm512_set1_epi64(`1`);
3733	assert_eq_m512i(r, e);
3734	}
3735
3736	#[simd_test(enable = "avx512vbmi2")]
3737	unsafe fn test_mm512_mask_shrdi_epi64() {
3738	let a = _mm512_set1_epi64(`8`);
3739	let b = _mm512_set1_epi64(`2`);
3740	let r = _mm512_mask_shrdi_epi64::<`1`>(a, `0`, a, b);
3741	assert_eq_m512i(r, a);
3742	let r = _mm512_mask_shrdi_epi64::<`1`>(a, `0b11111111`, a, b);
3743	let e = _mm512_set1_epi64(`1`);
3744	assert_eq_m512i(r, e);
3745	}
3746
3747	#[simd_test(enable = "avx512vbmi2")]
3748	unsafe fn test_mm512_maskz_shrdi_epi64() {
3749	let a = _mm512_set1_epi64(`8`);
3750	let b = _mm512_set1_epi64(`2`);
3751	let r = _mm512_maskz_shrdi_epi64::<`1`>(`0`, a, b);
3752	assert_eq_m512i(r, _mm512_setzero_si512());
3753	let r = _mm512_maskz_shrdi_epi64::<`1`>(`0b11111111`, a, b);
3754	let e = _mm512_set1_epi64(`1`);
3755	assert_eq_m512i(r, e);
3756	}
3757
3758	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3759	unsafe fn test_mm256_shrdi_epi64() {
3760	let a = _mm256_set1_epi64x(`8`);
3761	let b = _mm256_set1_epi64x(`2`);
3762	let r = _mm256_shrdi_epi64::<`1`>(a, b);
3763	let e = _mm256_set1_epi64x(`1`);
3764	assert_eq_m256i(r, e);
3765	}
3766
3767	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3768	unsafe fn test_mm256_mask_shrdi_epi64() {
3769	let a = _mm256_set1_epi64x(`8`);
3770	let b = _mm256_set1_epi64x(`2`);
3771	let r = _mm256_mask_shrdi_epi64::<`1`>(a, `0`, a, b);
3772	assert_eq_m256i(r, a);
3773	let r = _mm256_mask_shrdi_epi64::<`1`>(a, `0b00001111`, a, b);
3774	let e = _mm256_set1_epi64x(`1`);
3775	assert_eq_m256i(r, e);
3776	}
3777
3778	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3779	unsafe fn test_mm256_maskz_shrdi_epi64() {
3780	let a = _mm256_set1_epi64x(`8`);
3781	let b = _mm256_set1_epi64x(`2`);
3782	let r = _mm256_maskz_shrdi_epi64::<`1`>(`0`, a, b);
3783	assert_eq_m256i(r, _mm256_setzero_si256());
3784	let r = _mm256_maskz_shrdi_epi64::<`1`>(`0b00001111`, a, b);
3785	let e = _mm256_set1_epi64x(`1`);
3786	assert_eq_m256i(r, e);
3787	}
3788
3789	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3790	unsafe fn test_mm_shrdi_epi64() {
3791	let a = _mm_set1_epi64x(`8`);
3792	let b = _mm_set1_epi64x(`2`);
3793	let r = _mm_shrdi_epi64::<`1`>(a, b);
3794	let e = _mm_set1_epi64x(`1`);
3795	assert_eq_m128i(r, e);
3796	}
3797
3798	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3799	unsafe fn test_mm_mask_shrdi_epi64() {
3800	let a = _mm_set1_epi64x(`8`);
3801	let b = _mm_set1_epi64x(`2`);
3802	let r = _mm_mask_shrdi_epi64::<`1`>(a, `0`, a, b);
3803	assert_eq_m128i(r, a);
3804	let r = _mm_mask_shrdi_epi64::<`1`>(a, `0b00000011`, a, b);
3805	let e = _mm_set1_epi64x(`1`);
3806	assert_eq_m128i(r, e);
3807	}
3808
3809	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3810	unsafe fn test_mm_maskz_shrdi_epi64() {
3811	let a = _mm_set1_epi64x(`8`);
3812	let b = _mm_set1_epi64x(`2`);
3813	let r = _mm_maskz_shrdi_epi64::<`1`>(`0`, a, b);
3814	assert_eq_m128i(r, _mm_setzero_si128());
3815	let r = _mm_maskz_shrdi_epi64::<`1`>(`0b00000011`, a, b);
3816	let e = _mm_set1_epi64x(`1`);
3817	assert_eq_m128i(r, e);
3818	}
3819
3820	#[simd_test(enable = "avx512vbmi2")]
3821	unsafe fn test_mm512_shrdi_epi32() {
3822	let a = _mm512_set1_epi32(`8`);
3823	let b = _mm512_set1_epi32(`2`);
3824	let r = _mm512_shrdi_epi32::<`1`>(a, b);
3825	let e = _mm512_set1_epi32(`1`);
3826	assert_eq_m512i(r, e);
3827	}
3828
3829	#[simd_test(enable = "avx512vbmi2")]
3830	unsafe fn test_mm512_mask_shrdi_epi32() {
3831	let a = _mm512_set1_epi32(`8`);
3832	let b = _mm512_set1_epi32(`2`);
3833	let r = _mm512_mask_shrdi_epi32::<`1`>(a, `0`, a, b);
3834	assert_eq_m512i(r, a);
3835	let r = _mm512_mask_shrdi_epi32::<`1`>(a, `0b11111111_11111111`, a, b);
3836	let e = _mm512_set1_epi32(`1`);
3837	assert_eq_m512i(r, e);
3838	}
3839
3840	#[simd_test(enable = "avx512vbmi2")]
3841	unsafe fn test_mm512_maskz_shrdi_epi32() {
3842	let a = _mm512_set1_epi32(`8`);
3843	let b = _mm512_set1_epi32(`2`);
3844	let r = _mm512_maskz_shrdi_epi32::<`1`>(`0`, a, b);
3845	assert_eq_m512i(r, _mm512_setzero_si512());
3846	let r = _mm512_maskz_shrdi_epi32::<`1`>(`0b11111111_11111111`, a, b);
3847	let e = _mm512_set1_epi32(`1`);
3848	assert_eq_m512i(r, e);
3849	}
3850
3851	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3852	unsafe fn test_mm256_shrdi_epi32() {
3853	let a = _mm256_set1_epi32(`8`);
3854	let b = _mm256_set1_epi32(`2`);
3855	let r = _mm256_shrdi_epi32::<`1`>(a, b);
3856	let e = _mm256_set1_epi32(`1`);
3857	assert_eq_m256i(r, e);
3858	}
3859
3860	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3861	unsafe fn test_mm256_mask_shrdi_epi32() {
3862	let a = _mm256_set1_epi32(`8`);
3863	let b = _mm256_set1_epi32(`2`);
3864	let r = _mm256_mask_shrdi_epi32::<`1`>(a, `0`, a, b);
3865	assert_eq_m256i(r, a);
3866	let r = _mm256_mask_shrdi_epi32::<`1`>(a, `0b11111111`, a, b);
3867	let e = _mm256_set1_epi32(`1`);
3868	assert_eq_m256i(r, e);
3869	}
3870
3871	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3872	unsafe fn test_mm256_maskz_shrdi_epi32() {
3873	let a = _mm256_set1_epi32(`8`);
3874	let b = _mm256_set1_epi32(`2`);
3875	let r = _mm256_maskz_shrdi_epi32::<`1`>(`0`, a, b);
3876	assert_eq_m256i(r, _mm256_setzero_si256());
3877	let r = _mm256_maskz_shrdi_epi32::<`1`>(`0b11111111`, a, b);
3878	let e = _mm256_set1_epi32(`1`);
3879	assert_eq_m256i(r, e);
3880	}
3881
3882	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3883	unsafe fn test_mm_shrdi_epi32() {
3884	let a = _mm_set1_epi32(`8`);
3885	let b = _mm_set1_epi32(`2`);
3886	let r = _mm_shrdi_epi32::<`1`>(a, b);
3887	let e = _mm_set1_epi32(`1`);
3888	assert_eq_m128i(r, e);
3889	}
3890
3891	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3892	unsafe fn test_mm_mask_shrdi_epi32() {
3893	let a = _mm_set1_epi32(`8`);
3894	let b = _mm_set1_epi32(`2`);
3895	let r = _mm_mask_shrdi_epi32::<`1`>(a, `0`, a, b);
3896	assert_eq_m128i(r, a);
3897	let r = _mm_mask_shrdi_epi32::<`1`>(a, `0b00001111`, a, b);
3898	let e = _mm_set1_epi32(`1`);
3899	assert_eq_m128i(r, e);
3900	}
3901
3902	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3903	unsafe fn test_mm_maskz_shrdi_epi32() {
3904	let a = _mm_set1_epi32(`8`);
3905	let b = _mm_set1_epi32(`2`);
3906	let r = _mm_maskz_shrdi_epi32::<`1`>(`0`, a, b);
3907	assert_eq_m128i(r, _mm_setzero_si128());
3908	let r = _mm_maskz_shrdi_epi32::<`1`>(`0b00001111`, a, b);
3909	let e = _mm_set1_epi32(`1`);
3910	assert_eq_m128i(r, e);
3911	}
3912
3913	#[simd_test(enable = "avx512vbmi2")]
3914	unsafe fn test_mm512_shrdi_epi16() {
3915	let a = _mm512_set1_epi16(`8`);
3916	let b = _mm512_set1_epi16(`2`);
3917	let r = _mm512_shrdi_epi16::<`1`>(a, b);
3918	let e = _mm512_set1_epi16(`1`);
3919	assert_eq_m512i(r, e);
3920	}
3921
3922	#[simd_test(enable = "avx512vbmi2")]
3923	unsafe fn test_mm512_mask_shrdi_epi16() {
3924	let a = _mm512_set1_epi16(`8`);
3925	let b = _mm512_set1_epi16(`2`);
3926	let r = _mm512_mask_shrdi_epi16::<`1`>(a, `0`, a, b);
3927	assert_eq_m512i(r, a);
3928	let r = _mm512_mask_shrdi_epi16::<`1`>(a, `0b11111111_11111111_11111111_11111111`, a, b);
3929	let e = _mm512_set1_epi16(`1`);
3930	assert_eq_m512i(r, e);
3931	}
3932
3933	#[simd_test(enable = "avx512vbmi2")]
3934	unsafe fn test_mm512_maskz_shrdi_epi16() {
3935	let a = _mm512_set1_epi16(`8`);
3936	let b = _mm512_set1_epi16(`2`);
3937	let r = _mm512_maskz_shrdi_epi16::<`1`>(`0`, a, b);
3938	assert_eq_m512i(r, _mm512_setzero_si512());
3939	let r = _mm512_maskz_shrdi_epi16::<`1`>(`0b11111111_11111111_11111111_11111111`, a, b);
3940	let e = _mm512_set1_epi16(`1`);
3941	assert_eq_m512i(r, e);
3942	}
3943
3944	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3945	unsafe fn test_mm256_shrdi_epi16() {
3946	let a = _mm256_set1_epi16(`8`);
3947	let b = _mm256_set1_epi16(`2`);
3948	let r = _mm256_shrdi_epi16::<`1`>(a, b);
3949	let e = _mm256_set1_epi16(`1`);
3950	assert_eq_m256i(r, e);
3951	}
3952
3953	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3954	unsafe fn test_mm256_mask_shrdi_epi16() {
3955	let a = _mm256_set1_epi16(`8`);
3956	let b = _mm256_set1_epi16(`2`);
3957	let r = _mm256_mask_shrdi_epi16::<`1`>(a, `0`, a, b);
3958	assert_eq_m256i(r, a);
3959	let r = _mm256_mask_shrdi_epi16::<`1`>(a, `0b11111111_11111111`, a, b);
3960	let e = _mm256_set1_epi16(`1`);
3961	assert_eq_m256i(r, e);
3962	}
3963
3964	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3965	unsafe fn test_mm256_maskz_shrdi_epi16() {
3966	let a = _mm256_set1_epi16(`8`);
3967	let b = _mm256_set1_epi16(`2`);
3968	let r = _mm256_maskz_shrdi_epi16::<`1`>(`0`, a, b);
3969	assert_eq_m256i(r, _mm256_setzero_si256());
3970	let r = _mm256_maskz_shrdi_epi16::<`1`>(`0b11111111_11111111`, a, b);
3971	let e = _mm256_set1_epi16(`1`);
3972	assert_eq_m256i(r, e);
3973	}
3974
3975	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3976	unsafe fn test_mm_shrdi_epi16() {
3977	let a = _mm_set1_epi16(`8`);
3978	let b = _mm_set1_epi16(`2`);
3979	let r = _mm_shrdi_epi16::<`1`>(a, b);
3980	let e = _mm_set1_epi16(`1`);
3981	assert_eq_m128i(r, e);
3982	}
3983
3984	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3985	unsafe fn test_mm_mask_shrdi_epi16() {
3986	let a = _mm_set1_epi16(`8`);
3987	let b = _mm_set1_epi16(`2`);
3988	let r = _mm_mask_shrdi_epi16::<`1`>(a, `0`, a, b);
3989	assert_eq_m128i(r, a);
3990	let r = _mm_mask_shrdi_epi16::<`1`>(a, `0b11111111`, a, b);
3991	let e = _mm_set1_epi16(`1`);
3992	assert_eq_m128i(r, e);
3993	}
3994
3995	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3996	unsafe fn test_mm_maskz_shrdi_epi16() {
3997	let a = _mm_set1_epi16(`8`);
3998	let b = _mm_set1_epi16(`2`);
3999	let r = _mm_maskz_shrdi_epi16::<`1`>(`0`, a, b);
4000	assert_eq_m128i(r, _mm_setzero_si128());
4001	let r = _mm_maskz_shrdi_epi16::<`1`>(`0b11111111`, a, b);
4002	let e = _mm_set1_epi16(`1`);
4003	assert_eq_m128i(r, e);
4004	}
4005
4006	#[simd_test(enable = "avx512vbmi2")]
4007	unsafe fn test_mm512_mask_expandloadu_epi16() {
4008	let src = _mm512_set1_epi16(`42`);
4009	let a = &[
4010	`1_i16`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
4011	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4012	];
4013	let p = a.as_ptr();
4014	let m = `0b11101000_11001010_11110000_00001111`;
4015	let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p));
4016	let e = _mm512_set_epi16(
4017	`16`, `15`, `14`, `42`, `13`, `42`, `42`, `42`, `12`, `11`, `42`, `42`, `10`, `42`, `9`, `42`, `8`, `7`, `6`, `5`, `42`, `42`, `42`,
4018	`42`, `42`, `42`, `42`, `42`, `4`, `3`, `2`, `1`,
4019	);
4020	assert_eq_m512i(r, e);
4021	}
4022
4023	#[simd_test(enable = "avx512vbmi2")]
4024	unsafe fn test_mm512_maskz_expandloadu_epi16() {
4025	let a = &[
4026	`1_i16`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
4027	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4028	];
4029	let p = a.as_ptr();
4030	let m = `0b11101000_11001010_11110000_00001111`;
4031	let r = _mm512_maskz_expandloadu_epi16(m, black_box(p));
4032	let e = _mm512_set_epi16(
4033	`16`, `15`, `14`, `0`, `13`, `0`, `0`, `0`, `12`, `11`, `0`, `0`, `10`, `0`, `9`, `0`, `8`, `7`, `6`, `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
4034	`0`, `4`, `3`, `2`, `1`,
4035	);
4036	assert_eq_m512i(r, e);
4037	}
4038
4039	#[simd_test(enable = "avx512vbmi2,avx512vl")]
4040	unsafe fn test_mm256_mask_expandloadu_epi16() {
4041	let src = _mm256_set1_epi16(`42`);
4042	let a = &[`1_i16`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
4043	let p = a.as_ptr();
4044	let m = `0b11101000_11001010`;
4045	let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p));
4046	let e = _mm256_set_epi16(`8`, `7`, `6`, `42`, `5`, `42`, `42`, `42`, `4`, `3`, `42`, `42`, `2`, `42`, `1`, `42`);
4047	assert_eq_m256i(r, e);
4048	}
4049
4050	#[simd_test(enable = "avx512vbmi2,avx512vl")]
4051	unsafe fn test_mm256_maskz_expandloadu_epi16() {
4052	let a = &[`1_i16`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
4053	let p = a.as_ptr();
4054	let m = `0b11101000_11001010`;
4055	let r = _mm256_maskz_expandloadu_epi16(m, black_box(p));
4056	let e = _mm256_set_epi16(`8`, `7`, `6`, `0`, `5`, `0`, `0`, `0`, `4`, `3`, `0`, `0`, `2`, `0`, `1`, `0`);
4057	assert_eq_m256i(r, e);
4058	}
4059
4060	#[simd_test(enable = "avx512vbmi2,avx512vl")]
4061	unsafe fn test_mm_mask_expandloadu_epi16() {
4062	let src = _mm_set1_epi16(`42`);
4063	let a = &[`1_i16`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
4064	let p = a.as_ptr();
4065	let m = `0b11101000`;
4066	let r = _mm_mask_expandloadu_epi16(src, m, black_box(p));
4067	let e = _mm_set_epi16(`4`, `3`, `2`, `42`, `1`, `42`, `42`, `42`);
4068	assert_eq_m128i(r, e);
4069	}
4070
4071	#[simd_test(enable = "avx512vbmi2,avx512vl")]
4072	unsafe fn test_mm_maskz_expandloadu_epi16() {
4073	let a = &[`1_i16`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
4074	let p = a.as_ptr();
4075	let m = `0b11101000`;
4076	let r = _mm_maskz_expandloadu_epi16(m, black_box(p));
4077	let e = _mm_set_epi16(`4`, `3`, `2`, `0`, `1`, `0`, `0`, `0`);
4078	assert_eq_m128i(r, e);
4079	}
4080
4081	#[simd_test(enable = "avx512vbmi2")]
4082	unsafe fn test_mm512_mask_expandloadu_epi8() {
4083	let src = _mm512_set1_epi8(`42`);
4084	let a = &[
4085	`1_i8`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
4086	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`,
4087	`46`, `47`, `48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`, `64`,
4088	];
4089	let p = a.as_ptr();
4090	let m = `0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101`;
4091	let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p));
4092	let e = _mm512_set_epi8(
4093	`32`, `31`, `30`, `42`, `29`, `42`, `42`, `42`, `28`, `27`, `42`, `42`, `26`, `42`, `25`, `42`, `24`, `23`, `22`, `21`, `42`, `42`,
4094	`42`, `42`, `42`, `42`, `42`, `42`, `20`, `19`, `18`, `17`, `16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `42`, `42`, `42`, `42`,
4095	`42`, `42`, `42`, `42`, `8`, `42`, `7`, `42`, `6`, `42`, `5`, `42`, `42`, `4`, `42`, `3`, `42`, `2`, `42`, `1`,
4096	);
4097	assert_eq_m512i(r, e);
4098	}
4099
4100	#[simd_test(enable = "avx512vbmi2")]
4101	unsafe fn test_mm512_maskz_expandloadu_epi8() {
4102	let a = &[
4103	`1_i8`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
4104	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`,
4105	`46`, `47`, `48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`, `64`,
4106	];
4107	let p = a.as_ptr();
4108	let m = `0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101`;
4109	let r = _mm512_maskz_expandloadu_epi8(m, black_box(p));
4110	let e = _mm512_set_epi8(
4111	`32`, `31`, `30`, `0`, `29`, `0`, `0`, `0`, `28`, `27`, `0`, `0`, `26`, `0`, `25`, `0`, `24`, `23`, `22`, `21`, `0`, `0`, `0`, `0`, `0`,
4112	`0`, `0`, `0`, `20`, `19`, `18`, `17`, `16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `0`,
4113	`7`, `0`, `6`, `0`, `5`, `0`, `0`, `4`, `0`, `3`, `0`, `2`, `0`, `1`,
4114	);
4115	assert_eq_m512i(r, e);
4116	}
4117
4118	#[simd_test(enable = "avx512vbmi2,avx512vl")]
4119	unsafe fn test_mm256_mask_expandloadu_epi8() {
4120	let src = _mm256_set1_epi8(`42`);
4121	let a = &[
4122	`1_i8`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
4123	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4124	];
4125	let p = a.as_ptr();
4126	let m = `0b11101000_11001010_11110000_00001111`;
4127	let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p));
4128	let e = _mm256_set_epi8(
4129	`16`, `15`, `14`, `42`, `13`, `42`, `42`, `42`, `12`, `11`, `42`, `42`, `10`, `42`, `9`, `42`, `8`, `7`, `6`, `5`, `42`, `42`, `42`,
4130	`42`, `42`, `42`, `42`, `42`, `4`, `3`, `2`, `1`,
4131	);
4132	assert_eq_m256i(r, e);
4133	}
4134
4135	#[simd_test(enable = "avx512vbmi2,avx512vl")]
4136	unsafe fn test_mm256_maskz_expandloadu_epi8() {
4137	let a = &[
4138	`1_i8`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
4139	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
4140	];
4141	let p = a.as_ptr();
4142	let m = `0b11101000_11001010_11110000_00001111`;
4143	let r = _mm256_maskz_expandloadu_epi8(m, black_box(p));
4144	let e = _mm256_set_epi8(
4145	`16`, `15`, `14`, `0`, `13`, `0`, `0`, `0`, `12`, `11`, `0`, `0`, `10`, `0`, `9`, `0`, `8`, `7`, `6`, `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
4146	`0`, `4`, `3`, `2`, `1`,
4147	);
4148	assert_eq_m256i(r, e);
4149	}
4150
4151	#[simd_test(enable = "avx512vbmi2,avx512vl")]
4152	unsafe fn test_mm_mask_expandloadu_epi8() {
4153	let src = _mm_set1_epi8(`42`);
4154	let a = &[`1_i8`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
4155	let p = a.as_ptr();
4156	let m = `0b11101000_11001010`;
4157	let r = _mm_mask_expandloadu_epi8(src, m, black_box(p));
4158	let e = _mm_set_epi8(`8`, `7`, `6`, `42`, `5`, `42`, `42`, `42`, `4`, `3`, `42`, `42`, `2`, `42`, `1`, `42`);
4159	assert_eq_m128i(r, e);
4160	}
4161
4162	#[simd_test(enable = "avx512vbmi2,avx512vl")]
4163	unsafe fn test_mm_maskz_expandloadu_epi8() {
4164	let a = &[`1_i8`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
4165	let p = a.as_ptr();
4166	let m = `0b11101000_11001010`;
4167	let r = _mm_maskz_expandloadu_epi8(m, black_box(p));
4168	let e = _mm_set_epi8(`8`, `7`, `6`, `0`, `5`, `0`, `0`, `0`, `4`, `3`, `0`, `0`, `2`, `0`, `1`, `0`);
4169	assert_eq_m128i(r, e);
4170	}
4171
4172	#[simd_test(enable = "avx512vbmi2")]
4173	unsafe fn test_mm512_mask_compressstoreu_epi16() {
4174	let a = _mm512_set_epi16(
4175	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`, `16`, `15`, `14`, `13`, `12`, `11`,
4176	`10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`,
4177	);
4178	let mut r = [`0_i16`; `32`];
4179	_mm512_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, `0`, a);
4180	assert_eq!(&r, &[`0_i16`; `32`]);
4181	_mm512_mask_compressstoreu_epi16(
4182	r.as_mut_ptr() as *mut _,
4183	`0b11110000_11001010_11111111_00000000`,
4184	a,
4185	);
4186	assert_eq!(
4187	&r,
4188	&[
4189	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `18`, `20`, `23`, `24`, `29`, `30`, `31`, `32`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
4190	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
4191	]
4192	);
4193	}
4194
4195	#[simd_test(enable = "avx512vbmi2,avx512vl")]
4196	unsafe fn test_mm256_mask_compressstoreu_epi16() {
4197	let a = _mm256_set_epi16(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
4198	let mut r = [`0_i16`; `16`];
4199	_mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, `0`, a);
4200	assert_eq!(&r, &[`0_i16`; `16`]);
4201	_mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, `0b11110000_11001010`, a);
4202	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `13`, `14`, `15`, `16`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]);
4203	}
4204
4205	#[simd_test(enable = "avx512vbmi2,avx512vl")]
4206	unsafe fn test_mm_mask_compressstoreu_epi16() {
4207	let a = _mm_set_epi16(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
4208	let mut r = [`0_i16`; `8`];
4209	_mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, `0`, a);
4210	assert_eq!(&r, &[`0_i16`; `8`]);
4211	_mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, `0b11110000`, a);
4212	assert_eq!(&r, &[`5`, `6`, `7`, `8`, `0`, `0`, `0`, `0`]);
4213	}
4214
4215	#[simd_test(enable = "avx512vbmi2")]
4216	unsafe fn test_mm512_mask_compressstoreu_epi8() {
4217	let a = _mm512_set_epi8(
4218	`64`, `63`, `62`, `61`, `60`, `59`, `58`, `57`, `56`, `55`, `54`, `53`, `52`, `51`, `50`, `49`, `48`, `47`, `46`, `45`, `44`, `43`,
4219	`42`, `41`, `40`, `39`, `38`, `37`, `36`, `35`, `34`, `33`, `32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`,
4220	`20`, `19`, `18`, `17`, `16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`,
4221	);
4222	let mut r = [`0_i8`; `64`];
4223	_mm512_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, `0`, a);
4224	assert_eq!(&r, &[`0_i8`; `64`]);
4225	_mm512_mask_compressstoreu_epi8(
4226	r.as_mut_ptr() as *mut _,
4227	`0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111`,
4228	a,
4229	);
4230	assert_eq!(
4231	&r,
4232	&[
4233	`1`, `2`, `3`, `4`, `13`, `14`, `15`, `16`, `17`, `19`, `21`, `23`, `26`, `28`, `30`, `32`, `41`, `42`, `43`, `44`, `45`, `46`,
4234	`47`, `48`, `50`, `52`, `55`, `56`, `61`, `62`, `63`, `64`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
4235	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
4236	]
4237	);
4238	}
4239
4240	#[simd_test(enable = "avx512vbmi2,avx512vl")]
4241	unsafe fn test_mm256_mask_compressstoreu_epi8() {
4242	let a = _mm256_set_epi8(
4243	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`, `16`, `15`, `14`, `13`, `12`, `11`,
4244	`10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`,
4245	);
4246	let mut r = [`0_i8`; `32`];
4247	_mm256_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, `0`, a);
4248	assert_eq!(&r, &[`0_i8`; `32`]);
4249	_mm256_mask_compressstoreu_epi8(
4250	r.as_mut_ptr() as *mut _,
4251	`0b11110000_11001010_11111111_00000000`,
4252	a,
4253	);
4254	assert_eq!(
4255	&r,
4256	&[
4257	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `18`, `20`, `23`, `24`, `29`, `30`, `31`, `32`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
4258	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
4259	]
4260	);
4261	}
4262
4263	#[simd_test(enable = "avx512vbmi2,avx512vl")]
4264	unsafe fn test_mm_mask_compressstoreu_epi8() {
4265	let a = _mm_set_epi8(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
4266	let mut r = [`0_i8`; `16`];
4267	_mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, `0`, a);
4268	assert_eq!(&r, &[`0_i8`; `16`]);
4269	_mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, `0b11110000_11001010`, a);
4270	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `13`, `14`, `15`, `16`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]);
4271	}
4272	}
4273