avx512vbmi2.rs source code [crates/core_arch/src/x86/avx512vbmi2.rs]

1	use crate::{
2	core_arch::{simd::, x86::},
3	intrinsics::simd::*,
4	};
5
6	#[cfg(test)]
7	use stdarch_test::assert_instr;
8
9	/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10	///
11	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi16)
12	#[inline]
13	#[target_feature(enable = "avx512vbmi2")]
14	#[cfg_attr(test, assert_instr(vpexpandw))]
15	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16	pub unsafe fn _mm512_mask_expandloadu_epi16(
17	src: __m512i,
18	k: __mmask32,
19	mem_addr: *const i16,
20	) -> __m512i {
21	transmute(src:expandloadw_512(mem_addr, a:src.as_i16x32(), mask:k))
22	}
23
24	/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25	///
26	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi16)
27	#[inline]
28	#[target_feature(enable = "avx512vbmi2")]
29	#[cfg_attr(test, assert_instr(vpexpandw))]
30	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31	pub unsafe fn _mm512_maskz_expandloadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
32	_mm512_mask_expandloadu_epi16(src:_mm512_setzero_si512(), k, mem_addr)
33	}
34
35	/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36	///
37	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi16)
38	#[inline]
39	#[target_feature(enable = "avx512vbmi2,avx512vl")]
40	#[cfg_attr(test, assert_instr(vpexpandw))]
41	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42	pub unsafe fn _mm256_mask_expandloadu_epi16(
43	src: __m256i,
44	k: __mmask16,
45	mem_addr: *const i16,
46	) -> __m256i {
47	transmute(src:expandloadw_256(mem_addr, a:src.as_i16x16(), mask:k))
48	}
49
50	/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
51	///
52	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi16)
53	#[inline]
54	#[target_feature(enable = "avx512vbmi2,avx512vl")]
55	#[cfg_attr(test, assert_instr(vpexpandw))]
56	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
57	pub unsafe fn _mm256_maskz_expandloadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
58	_mm256_mask_expandloadu_epi16(src:_mm256_setzero_si256(), k, mem_addr)
59	}
60
61	/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
62	///
63	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi16)
64	#[inline]
65	#[target_feature(enable = "avx512vbmi2,avx512vl")]
66	#[cfg_attr(test, assert_instr(vpexpandw))]
67	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
68	pub unsafe fn _mm_mask_expandloadu_epi16(
69	src: __m128i,
70	k: __mmask8,
71	mem_addr: *const i16,
72	) -> __m128i {
73	transmute(src:expandloadw_128(mem_addr, a:src.as_i16x8(), mask:k))
74	}
75
76	/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
77	///
78	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi16)
79	#[inline]
80	#[target_feature(enable = "avx512vbmi2,avx512vl")]
81	#[cfg_attr(test, assert_instr(vpexpandw))]
82	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
83	pub unsafe fn _mm_maskz_expandloadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
84	_mm_mask_expandloadu_epi16(src:_mm_setzero_si128(), k, mem_addr)
85	}
86
87	/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
88	///
89	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi8)
90	#[inline]
91	#[target_feature(enable = "avx512vbmi2")]
92	#[cfg_attr(test, assert_instr(vpexpandb))]
93	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
94	pub unsafe fn _mm512_mask_expandloadu_epi8(
95	src: __m512i,
96	k: __mmask64,
97	mem_addr: *const i8,
98	) -> __m512i {
99	transmute(src:expandloadb_512(mem_addr, a:src.as_i8x64(), mask:k))
100	}
101
102	/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103	///
104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi8)
105	#[inline]
106	#[target_feature(enable = "avx512vbmi2")]
107	#[cfg_attr(test, assert_instr(vpexpandb))]
108	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
109	pub unsafe fn _mm512_maskz_expandloadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
110	_mm512_mask_expandloadu_epi8(src:_mm512_setzero_si512(), k, mem_addr)
111	}
112
113	/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
114	///
115	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi8)
116	#[inline]
117	#[target_feature(enable = "avx512vbmi2,avx512vl")]
118	#[cfg_attr(test, assert_instr(vpexpandb))]
119	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
120	pub unsafe fn _mm256_mask_expandloadu_epi8(
121	src: __m256i,
122	k: __mmask32,
123	mem_addr: *const i8,
124	) -> __m256i {
125	transmute(src:expandloadb_256(mem_addr, a:src.as_i8x32(), mask:k))
126	}
127
128	/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
129	///
130	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi8)
131	#[inline]
132	#[target_feature(enable = "avx512vbmi2,avx512vl")]
133	#[cfg_attr(test, assert_instr(vpexpandb))]
134	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
135	pub unsafe fn _mm256_maskz_expandloadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
136	_mm256_mask_expandloadu_epi8(src:_mm256_setzero_si256(), k, mem_addr)
137	}
138
139	/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
140	///
141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi8)
142	#[inline]
143	#[target_feature(enable = "avx512vbmi2,avx512vl")]
144	#[cfg_attr(test, assert_instr(vpexpandb))]
145	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
146	pub unsafe fn _mm_mask_expandloadu_epi8(
147	src: __m128i,
148	k: __mmask16,
149	mem_addr: *const i8,
150	) -> __m128i {
151	transmute(src:expandloadb_128(mem_addr, a:src.as_i8x16(), mask:k))
152	}
153
154	/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155	///
156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi8)
157	#[inline]
158	#[target_feature(enable = "avx512vbmi2,avx512vl")]
159	#[cfg_attr(test, assert_instr(vpexpandb))]
160	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
161	pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
162	_mm_mask_expandloadu_epi8(src:_mm_setzero_si128(), k, mem_addr)
163	}
164
165	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
166	///
167	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi16)
168	#[inline]
169	#[target_feature(enable = "avx512vbmi2")]
170	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
171	#[cfg_attr(test, assert_instr(vpcompressw))]
172	pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask32, a: __m512i) {
173	vcompressstorew(mem:base_addr as *mut _, data:a.as_i16x32(), mask:k)
174	}
175
176	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
177	///
178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi16)
179	#[inline]
180	#[target_feature(enable = "avx512vbmi2,avx512vl")]
181	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
182	#[cfg_attr(test, assert_instr(vpcompressw))]
183	pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask16, a: __m256i) {
184	vcompressstorew256(mem:base_addr as *mut _, data:a.as_i16x16(), mask:k)
185	}
186
187	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
188	///
189	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi16)
190	#[inline]
191	#[target_feature(enable = "avx512vbmi2,avx512vl")]
192	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
193	#[cfg_attr(test, assert_instr(vpcompressw))]
194	pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask8, a: __m128i) {
195	vcompressstorew128(mem:base_addr as *mut _, data:a.as_i16x8(), mask:k)
196	}
197
198	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
199	///
200	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi8)
201	#[inline]
202	#[target_feature(enable = "avx512vbmi2")]
203	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
204	#[cfg_attr(test, assert_instr(vpcompressb))]
205	pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask64, a: __m512i) {
206	vcompressstoreb(mem:base_addr as *mut _, data:a.as_i8x64(), mask:k)
207	}
208
209	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
210	///
211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi8)
212	#[inline]
213	#[target_feature(enable = "avx512vbmi2,avx512vl")]
214	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
215	#[cfg_attr(test, assert_instr(vpcompressb))]
216	pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask32, a: __m256i) {
217	vcompressstoreb256(mem:base_addr as *mut _, data:a.as_i8x32(), mask:k)
218	}
219
220	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
221	///
222	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi8)
223	#[inline]
224	#[target_feature(enable = "avx512vbmi2,avx512vl")]
225	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
226	#[cfg_attr(test, assert_instr(vpcompressb))]
227	pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask16, a: __m128i) {
228	vcompressstoreb128(mem:base_addr as *mut _, data:a.as_i8x16(), mask:k)
229	}
230
231	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
232	///
233	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi16&expand=1192)
234	#[inline]
235	#[target_feature(enable = "avx512vbmi2")]
236	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
237	#[cfg_attr(test, assert_instr(vpcompressw))]
238	pub fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
239	unsafe { transmute(src:vpcompressw(a.as_i16x32(), src.as_i16x32(), mask:k)) }
240	}
241
242	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
243	///
244	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi16&expand=1193)
245	#[inline]
246	#[target_feature(enable = "avx512vbmi2")]
247	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
248	#[cfg_attr(test, assert_instr(vpcompressw))]
249	pub fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i {
250	unsafe { transmute(src:vpcompressw(a.as_i16x32(), src:i16x32::ZERO, mask:k)) }
251	}
252
253	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
254	///
255	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi16&expand=1190)
256	#[inline]
257	#[target_feature(enable = "avx512vbmi2,avx512vl")]
258	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
259	#[cfg_attr(test, assert_instr(vpcompressw))]
260	pub fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
261	unsafe { transmute(src:vpcompressw256(a.as_i16x16(), src.as_i16x16(), mask:k)) }
262	}
263
264	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
265	///
266	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi16&expand=1191)
267	#[inline]
268	#[target_feature(enable = "avx512vbmi2,avx512vl")]
269	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
270	#[cfg_attr(test, assert_instr(vpcompressw))]
271	pub fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i {
272	unsafe { transmute(src:vpcompressw256(a.as_i16x16(), src:i16x16::ZERO, mask:k)) }
273	}
274
275	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
276	///
277	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi16&expand=1188)
278	#[inline]
279	#[target_feature(enable = "avx512vbmi2,avx512vl")]
280	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
281	#[cfg_attr(test, assert_instr(vpcompressw))]
282	pub fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
283	unsafe { transmute(src:vpcompressw128(a.as_i16x8(), src.as_i16x8(), mask:k)) }
284	}
285
286	/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
287	///
288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi16&expand=1189)
289	#[inline]
290	#[target_feature(enable = "avx512vbmi2,avx512vl")]
291	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
292	#[cfg_attr(test, assert_instr(vpcompressw))]
293	pub fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i {
294	unsafe { transmute(src:vpcompressw128(a.as_i16x8(), src:i16x8::ZERO, mask:k)) }
295	}
296
297	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
298	///
299	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi8&expand=1210)
300	#[inline]
301	#[target_feature(enable = "avx512vbmi2")]
302	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
303	#[cfg_attr(test, assert_instr(vpcompressb))]
304	pub fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
305	unsafe { transmute(src:vpcompressb(a.as_i8x64(), src.as_i8x64(), mask:k)) }
306	}
307
308	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
309	///
310	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi8&expand=1211)
311	#[inline]
312	#[target_feature(enable = "avx512vbmi2")]
313	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
314	#[cfg_attr(test, assert_instr(vpcompressb))]
315	pub fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i {
316	unsafe { transmute(src:vpcompressb(a.as_i8x64(), src:i8x64::ZERO, mask:k)) }
317	}
318
319	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
320	///
321	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi8&expand=1208)
322	#[inline]
323	#[target_feature(enable = "avx512vbmi2,avx512vl")]
324	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
325	#[cfg_attr(test, assert_instr(vpcompressb))]
326	pub fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
327	unsafe { transmute(src:vpcompressb256(a.as_i8x32(), src.as_i8x32(), mask:k)) }
328	}
329
330	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
331	///
332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi8&expand=1209)
333	#[inline]
334	#[target_feature(enable = "avx512vbmi2,avx512vl")]
335	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
336	#[cfg_attr(test, assert_instr(vpcompressb))]
337	pub fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i {
338	unsafe { transmute(src:vpcompressb256(a.as_i8x32(), src:i8x32::ZERO, mask:k)) }
339	}
340
341	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
342	///
343	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi8&expand=1206)
344	#[inline]
345	#[target_feature(enable = "avx512vbmi2,avx512vl")]
346	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
347	#[cfg_attr(test, assert_instr(vpcompressb))]
348	pub fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
349	unsafe { transmute(src:vpcompressb128(a.as_i8x16(), src.as_i8x16(), mask:k)) }
350	}
351
352	/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
353	///
354	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi8&expand=1207)
355	#[inline]
356	#[target_feature(enable = "avx512vbmi2,avx512vl")]
357	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
358	#[cfg_attr(test, assert_instr(vpcompressb))]
359	pub fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i {
360	unsafe { transmute(src:vpcompressb128(a.as_i8x16(), src:i8x16::ZERO, mask:k)) }
361	}
362
363	/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
364	///
365	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi16&expand=2310)
366	#[inline]
367	#[target_feature(enable = "avx512vbmi2")]
368	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
369	#[cfg_attr(test, assert_instr(vpexpandw))]
370	pub fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
371	unsafe { transmute(src:vpexpandw(a.as_i16x32(), src.as_i16x32(), mask:k)) }
372	}
373
374	/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
375	///
376	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi16&expand=2311)
377	#[inline]
378	#[target_feature(enable = "avx512vbmi2")]
379	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
380	#[cfg_attr(test, assert_instr(vpexpandw))]
381	pub fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i {
382	unsafe { transmute(src:vpexpandw(a.as_i16x32(), src:i16x32::ZERO, mask:k)) }
383	}
384
385	/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
386	///
387	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi16&expand=2308)
388	#[inline]
389	#[target_feature(enable = "avx512vbmi2,avx512vl")]
390	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
391	#[cfg_attr(test, assert_instr(vpexpandw))]
392	pub fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
393	unsafe { transmute(src:vpexpandw256(a.as_i16x16(), src.as_i16x16(), mask:k)) }
394	}
395
396	/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
397	///
398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi16&expand=2309)
399	#[inline]
400	#[target_feature(enable = "avx512vbmi2,avx512vl")]
401	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
402	#[cfg_attr(test, assert_instr(vpexpandw))]
403	pub fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i {
404	unsafe { transmute(src:vpexpandw256(a.as_i16x16(), src:i16x16::ZERO, mask:k)) }
405	}
406
407	/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
408	///
409	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi16&expand=2306)
410	#[inline]
411	#[target_feature(enable = "avx512vbmi2,avx512vl")]
412	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
413	#[cfg_attr(test, assert_instr(vpexpandw))]
414	pub fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
415	unsafe { transmute(src:vpexpandw128(a.as_i16x8(), src.as_i16x8(), mask:k)) }
416	}
417
418	/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
419	///
420	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi16&expand=2307)
421	#[inline]
422	#[target_feature(enable = "avx512vbmi2,avx512vl")]
423	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
424	#[cfg_attr(test, assert_instr(vpexpandw))]
425	pub fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i {
426	unsafe { transmute(src:vpexpandw128(a.as_i16x8(), src:i16x8::ZERO, mask:k)) }
427	}
428
429	/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430	///
431	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi8&expand=2328)
432	#[inline]
433	#[target_feature(enable = "avx512vbmi2")]
434	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
435	#[cfg_attr(test, assert_instr(vpexpandb))]
436	pub fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
437	unsafe { transmute(src:vpexpandb(a.as_i8x64(), src.as_i8x64(), mask:k)) }
438	}
439
440	/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
441	///
442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi8&expand=2329)
443	#[inline]
444	#[target_feature(enable = "avx512vbmi2")]
445	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
446	#[cfg_attr(test, assert_instr(vpexpandb))]
447	pub fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i {
448	unsafe { transmute(src:vpexpandb(a.as_i8x64(), src:i8x64::ZERO, mask:k)) }
449	}
450
451	/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
452	///
453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi8&expand=2326)
454	#[inline]
455	#[target_feature(enable = "avx512vbmi2,avx512vl")]
456	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
457	#[cfg_attr(test, assert_instr(vpexpandb))]
458	pub fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
459	unsafe { transmute(src:vpexpandb256(a.as_i8x32(), src.as_i8x32(), mask:k)) }
460	}
461
462	/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
463	///
464	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi8&expand=2327)
465	#[inline]
466	#[target_feature(enable = "avx512vbmi2,avx512vl")]
467	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
468	#[cfg_attr(test, assert_instr(vpexpandb))]
469	pub fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i {
470	unsafe { transmute(src:vpexpandb256(a.as_i8x32(), src:i8x32::ZERO, mask:k)) }
471	}
472
473	/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
474	///
475	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi8&expand=2324)
476	#[inline]
477	#[target_feature(enable = "avx512vbmi2,avx512vl")]
478	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
479	#[cfg_attr(test, assert_instr(vpexpandb))]
480	pub fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
481	unsafe { transmute(src:vpexpandb128(a.as_i8x16(), src.as_i8x16(), mask:k)) }
482	}
483
484	/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
485	///
486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi8&expand=2325)
487	#[inline]
488	#[target_feature(enable = "avx512vbmi2,avx512vl")]
489	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
490	#[cfg_attr(test, assert_instr(vpexpandb))]
491	pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i {
492	unsafe { transmute(src:vpexpandb128(a.as_i8x16(), src:i8x16::ZERO, mask:k)) }
493	}
494
495	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
496	///
497	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi64&expand=5087)
498	#[inline]
499	#[target_feature(enable = "avx512vbmi2")]
500	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
501	#[cfg_attr(test, assert_instr(vpshldvq))]
502	pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
503	unsafe { transmute(src:vpshldvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) }
504	}
505
506	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
507	///
508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi64&expand=5085)
509	#[inline]
510	#[target_feature(enable = "avx512vbmi2")]
511	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
512	#[cfg_attr(test, assert_instr(vpshldvq))]
513	pub fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
514	unsafe {
515	let shf: i64x8 = _mm512_shldv_epi64(a, b, c).as_i64x8();
516	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x8()))
517	}
518	}
519
520	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
521	///
522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi64&expand=5086)
523	#[inline]
524	#[target_feature(enable = "avx512vbmi2")]
525	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
526	#[cfg_attr(test, assert_instr(vpshldvq))]
527	pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
528	unsafe {
529	let shf: i64x8 = _mm512_shldv_epi64(a, b, c).as_i64x8();
530	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
531	}
532	}
533
534	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
535	///
536	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi64&expand=5084)
537	#[inline]
538	#[target_feature(enable = "avx512vbmi2,avx512vl")]
539	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
540	#[cfg_attr(test, assert_instr(vpshldvq))]
541	pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
542	unsafe { transmute(src:vpshldvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) }
543	}
544
545	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
546	///
547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi64&expand=5082)
548	#[inline]
549	#[target_feature(enable = "avx512vbmi2,avx512vl")]
550	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
551	#[cfg_attr(test, assert_instr(vpshldvq))]
552	pub fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
553	unsafe {
554	let shf: i64x4 = _mm256_shldv_epi64(a, b, c).as_i64x4();
555	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x4()))
556	}
557	}
558
559	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
560	///
561	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi64&expand=5083)
562	#[inline]
563	#[target_feature(enable = "avx512vbmi2,avx512vl")]
564	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
565	#[cfg_attr(test, assert_instr(vpshldvq))]
566	pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
567	unsafe {
568	let shf: i64x4 = _mm256_shldv_epi64(a, b, c).as_i64x4();
569	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
570	}
571	}
572
573	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
574	///
575	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi64&expand=5081)
576	#[inline]
577	#[target_feature(enable = "avx512vbmi2,avx512vl")]
578	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
579	#[cfg_attr(test, assert_instr(vpshldvq))]
580	pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
581	unsafe { transmute(src:vpshldvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) }
582	}
583
584	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
585	///
586	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi64&expand=5079)
587	#[inline]
588	#[target_feature(enable = "avx512vbmi2,avx512vl")]
589	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
590	#[cfg_attr(test, assert_instr(vpshldvq))]
591	pub fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
592	unsafe {
593	let shf: i64x2 = _mm_shldv_epi64(a, b, c).as_i64x2();
594	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x2()))
595	}
596	}
597
598	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
599	///
600	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi64&expand=5080)
601	#[inline]
602	#[target_feature(enable = "avx512vbmi2,avx512vl")]
603	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
604	#[cfg_attr(test, assert_instr(vpshldvq))]
605	pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
606	unsafe {
607	let shf: i64x2 = _mm_shldv_epi64(a, b, c).as_i64x2();
608	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
609	}
610	}
611
612	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
613	///
614	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi32&expand=5078)
615	#[inline]
616	#[target_feature(enable = "avx512vbmi2")]
617	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
618	#[cfg_attr(test, assert_instr(vpshldvd))]
619	pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
620	unsafe { transmute(src:vpshldvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) }
621	}
622
623	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
624	///
625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi32&expand=5076)
626	#[inline]
627	#[target_feature(enable = "avx512vbmi2")]
628	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
629	#[cfg_attr(test, assert_instr(vpshldvd))]
630	pub fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
631	unsafe {
632	let shf: i32x16 = _mm512_shldv_epi32(a, b, c).as_i32x16();
633	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x16()))
634	}
635	}
636
637	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
638	///
639	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi32&expand=5077)
640	#[inline]
641	#[target_feature(enable = "avx512vbmi2")]
642	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
643	#[cfg_attr(test, assert_instr(vpshldvd))]
644	pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
645	unsafe {
646	let shf: i32x16 = _mm512_shldv_epi32(a, b, c).as_i32x16();
647	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
648	}
649	}
650
651	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
652	///
653	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi32&expand=5075)
654	#[inline]
655	#[target_feature(enable = "avx512vbmi2,avx512vl")]
656	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
657	#[cfg_attr(test, assert_instr(vpshldvd))]
658	pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
659	unsafe { transmute(src:vpshldvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) }
660	}
661
662	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
663	///
664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi32&expand=5073)
665	#[inline]
666	#[target_feature(enable = "avx512vbmi2,avx512vl")]
667	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
668	#[cfg_attr(test, assert_instr(vpshldvd))]
669	pub fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
670	unsafe {
671	let shf: i32x8 = _mm256_shldv_epi32(a, b, c).as_i32x8();
672	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x8()))
673	}
674	}
675
676	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
677	///
678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi32&expand=5074)
679	#[inline]
680	#[target_feature(enable = "avx512vbmi2,avx512vl")]
681	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
682	#[cfg_attr(test, assert_instr(vpshldvd))]
683	pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
684	unsafe {
685	let shf: i32x8 = _mm256_shldv_epi32(a, b, c).as_i32x8();
686	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
687	}
688	}
689
690	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
691	///
692	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi32&expand=5072)
693	#[inline]
694	#[target_feature(enable = "avx512vbmi2,avx512vl")]
695	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
696	#[cfg_attr(test, assert_instr(vpshldvd))]
697	pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
698	unsafe { transmute(src:vpshldvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
699	}
700
701	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
702	///
703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi32&expand=5070)
704	#[inline]
705	#[target_feature(enable = "avx512vbmi2,avx512vl")]
706	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
707	#[cfg_attr(test, assert_instr(vpshldvd))]
708	pub fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
709	unsafe {
710	let shf: i32x4 = _mm_shldv_epi32(a, b, c).as_i32x4();
711	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x4()))
712	}
713	}
714
715	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
716	///
717	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi32&expand=5071)
718	#[inline]
719	#[target_feature(enable = "avx512vbmi2,avx512vl")]
720	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
721	#[cfg_attr(test, assert_instr(vpshldvd))]
722	pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
723	unsafe {
724	let shf: i32x4 = _mm_shldv_epi32(a, b, c).as_i32x4();
725	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
726	}
727	}
728
729	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
730	///
731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi16&expand=5069)
732	#[inline]
733	#[target_feature(enable = "avx512vbmi2")]
734	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
735	#[cfg_attr(test, assert_instr(vpshldvw))]
736	pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
737	unsafe { transmute(src:vpshldvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) }
738	}
739
740	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
741	///
742	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi16&expand=5067)
743	#[inline]
744	#[target_feature(enable = "avx512vbmi2")]
745	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
746	#[cfg_attr(test, assert_instr(vpshldvw))]
747	pub fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
748	unsafe {
749	let shf: i16x32 = _mm512_shldv_epi16(a, b, c).as_i16x32();
750	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x32()))
751	}
752	}
753
754	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
755	///
756	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi16&expand=5068)
757	#[inline]
758	#[target_feature(enable = "avx512vbmi2")]
759	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
760	#[cfg_attr(test, assert_instr(vpshldvw))]
761	pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
762	unsafe {
763	let shf: i16x32 = _mm512_shldv_epi16(a, b, c).as_i16x32();
764	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x32::ZERO))
765	}
766	}
767
768	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
769	///
770	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi16&expand=5066)
771	#[inline]
772	#[target_feature(enable = "avx512vbmi2,avx512vl")]
773	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
774	#[cfg_attr(test, assert_instr(vpshldvw))]
775	pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
776	unsafe { transmute(src:vpshldvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) }
777	}
778
779	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
780	///
781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi16&expand=5064)
782	#[inline]
783	#[target_feature(enable = "avx512vbmi2,avx512vl")]
784	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
785	#[cfg_attr(test, assert_instr(vpshldvw))]
786	pub fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
787	unsafe {
788	let shf: i16x16 = _mm256_shldv_epi16(a, b, c).as_i16x16();
789	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x16()))
790	}
791	}
792
793	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
794	///
795	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi16&expand=5065)
796	#[inline]
797	#[target_feature(enable = "avx512vbmi2,avx512vl")]
798	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
799	#[cfg_attr(test, assert_instr(vpshldvw))]
800	pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
801	unsafe {
802	let shf: i16x16 = _mm256_shldv_epi16(a, b, c).as_i16x16();
803	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x16::ZERO))
804	}
805	}
806
807	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
808	///
809	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi16&expand=5063)
810	#[inline]
811	#[target_feature(enable = "avx512vbmi2,avx512vl")]
812	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
813	#[cfg_attr(test, assert_instr(vpshldvw))]
814	pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
815	unsafe { transmute(src:vpshldvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) }
816	}
817
818	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
819	///
820	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi16&expand=5061)
821	#[inline]
822	#[target_feature(enable = "avx512vbmi2,avx512vl")]
823	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
824	#[cfg_attr(test, assert_instr(vpshldvw))]
825	pub fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
826	unsafe {
827	let shf: i16x8 = _mm_shldv_epi16(a, b, c).as_i16x8();
828	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x8()))
829	}
830	}
831
832	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
833	///
834	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi16&expand=5062)
835	#[inline]
836	#[target_feature(enable = "avx512vbmi2,avx512vl")]
837	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
838	#[cfg_attr(test, assert_instr(vpshldvw))]
839	pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
840	unsafe {
841	let shf: i16x8 = _mm_shldv_epi16(a, b, c).as_i16x8();
842	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x8::ZERO))
843	}
844	}
845
846	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
847	///
848	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi64&expand=5141)
849	#[inline]
850	#[target_feature(enable = "avx512vbmi2")]
851	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
852	#[cfg_attr(test, assert_instr(vpshrdvq))]
853	pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
854	unsafe { transmute(src:vpshrdvq(a:b.as_i64x8(), b:a.as_i64x8(), c.as_i64x8())) }
855	}
856
857	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
858	///
859	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi64&expand=5139)
860	#[inline]
861	#[target_feature(enable = "avx512vbmi2")]
862	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
863	#[cfg_attr(test, assert_instr(vpshrdvq))]
864	pub fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
865	unsafe {
866	let shf: i64x8 = _mm512_shrdv_epi64(a, b, c).as_i64x8();
867	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x8()))
868	}
869	}
870
871	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
872	///
873	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi64&expand=5140)
874	#[inline]
875	#[target_feature(enable = "avx512vbmi2")]
876	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
877	#[cfg_attr(test, assert_instr(vpshrdvq))]
878	pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
879	unsafe {
880	let shf: i64x8 = _mm512_shrdv_epi64(a, b, c).as_i64x8();
881	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
882	}
883	}
884
885	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
886	///
887	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi64&expand=5138)
888	#[inline]
889	#[target_feature(enable = "avx512vbmi2,avx512vl")]
890	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
891	#[cfg_attr(test, assert_instr(vpshrdvq))]
892	pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
893	unsafe { transmute(src:vpshrdvq256(a:b.as_i64x4(), b:a.as_i64x4(), c.as_i64x4())) }
894	}
895
896	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
897	///
898	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi64&expand=5136)
899	#[inline]
900	#[target_feature(enable = "avx512vbmi2,avx512vl")]
901	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
902	#[cfg_attr(test, assert_instr(vpshrdvq))]
903	pub fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
904	unsafe {
905	let shf: i64x4 = _mm256_shrdv_epi64(a, b, c).as_i64x4();
906	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x4()))
907	}
908	}
909
910	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
911	///
912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi64&expand=5137)
913	#[inline]
914	#[target_feature(enable = "avx512vbmi2,avx512vl")]
915	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
916	#[cfg_attr(test, assert_instr(vpshrdvq))]
917	pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
918	unsafe {
919	let shf: i64x4 = _mm256_shrdv_epi64(a, b, c).as_i64x4();
920	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
921	}
922	}
923
924	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
925	///
926	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi64&expand=5135)
927	#[inline]
928	#[target_feature(enable = "avx512vbmi2,avx512vl")]
929	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
930	#[cfg_attr(test, assert_instr(vpshrdvq))]
931	pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
932	unsafe { transmute(src:vpshrdvq128(a:b.as_i64x2(), b:a.as_i64x2(), c.as_i64x2())) }
933	}
934
935	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
936	///
937	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi64&expand=5133)
938	#[inline]
939	#[target_feature(enable = "avx512vbmi2,avx512vl")]
940	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
941	#[cfg_attr(test, assert_instr(vpshrdvq))]
942	pub fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
943	unsafe {
944	let shf: i64x2 = _mm_shrdv_epi64(a, b, c).as_i64x2();
945	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x2()))
946	}
947	}
948
949	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
950	///
951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi64&expand=5134)
952	#[inline]
953	#[target_feature(enable = "avx512vbmi2,avx512vl")]
954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
955	#[cfg_attr(test, assert_instr(vpshrdvq))]
956	pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
957	unsafe {
958	let shf: i64x2 = _mm_shrdv_epi64(a, b, c).as_i64x2();
959	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
960	}
961	}
962
963	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
964	///
965	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi32&expand=5132)
966	#[inline]
967	#[target_feature(enable = "avx512vbmi2")]
968	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
969	#[cfg_attr(test, assert_instr(vpshrdvd))]
970	pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
971	unsafe { transmute(src:vpshrdvd(a:b.as_i32x16(), b:a.as_i32x16(), c.as_i32x16())) }
972	}
973
974	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
975	///
976	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi32&expand=5130)
977	#[inline]
978	#[target_feature(enable = "avx512vbmi2")]
979	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
980	#[cfg_attr(test, assert_instr(vpshrdvd))]
981	pub fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
982	unsafe {
983	let shf: i32x16 = _mm512_shrdv_epi32(a, b, c).as_i32x16();
984	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x16()))
985	}
986	}
987
988	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
989	///
990	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi32&expand=5131)
991	#[inline]
992	#[target_feature(enable = "avx512vbmi2")]
993	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
994	#[cfg_attr(test, assert_instr(vpshrdvd))]
995	pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
996	unsafe {
997	let shf: i32x16 = _mm512_shrdv_epi32(a, b, c).as_i32x16();
998	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
999	}
1000	}
1001
1002	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1003	///
1004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi32&expand=5129)
1005	#[inline]
1006	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1007	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1008	#[cfg_attr(test, assert_instr(vpshrdvd))]
1009	pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1010	unsafe { transmute(src:vpshrdvd256(a:b.as_i32x8(), b:a.as_i32x8(), c.as_i32x8())) }
1011	}
1012
1013	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1014	///
1015	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi32&expand=5127)
1016	#[inline]
1017	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1018	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1019	#[cfg_attr(test, assert_instr(vpshrdvd))]
1020	pub fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
1021	unsafe {
1022	let shf: i32x8 = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1023	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x8()))
1024	}
1025	}
1026
1027	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1028	///
1029	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi32&expand=5128)
1030	#[inline]
1031	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1032	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1033	#[cfg_attr(test, assert_instr(vpshrdvd))]
1034	pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1035	unsafe {
1036	let shf: i32x8 = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1037	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
1038	}
1039	}
1040
1041	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1042	///
1043	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi32&expand=5126)
1044	#[inline]
1045	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1046	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1047	#[cfg_attr(test, assert_instr(vpshrdvd))]
1048	pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1049	unsafe { transmute(src:vpshrdvd128(a:b.as_i32x4(), b:a.as_i32x4(), c.as_i32x4())) }
1050	}
1051
1052	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1053	///
1054	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi32&expand=5124)
1055	#[inline]
1056	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1057	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1058	#[cfg_attr(test, assert_instr(vpshrdvd))]
1059	pub fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1060	unsafe {
1061	let shf: i32x4 = _mm_shrdv_epi32(a, b, c).as_i32x4();
1062	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x4()))
1063	}
1064	}
1065
1066	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1067	///
1068	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi32&expand=5125)
1069	#[inline]
1070	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1071	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1072	#[cfg_attr(test, assert_instr(vpshrdvd))]
1073	pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1074	unsafe {
1075	let shf: i32x4 = _mm_shrdv_epi32(a, b, c).as_i32x4();
1076	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
1077	}
1078	}
1079
1080	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1081	///
1082	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi16&expand=5123)
1083	#[inline]
1084	#[target_feature(enable = "avx512vbmi2")]
1085	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1086	#[cfg_attr(test, assert_instr(vpshrdvw))]
1087	pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1088	unsafe { transmute(src:vpshrdvw(a:b.as_i16x32(), b:a.as_i16x32(), c.as_i16x32())) }
1089	}
1090
1091	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1092	///
1093	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi16&expand=5121)
1094	#[inline]
1095	#[target_feature(enable = "avx512vbmi2")]
1096	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1097	#[cfg_attr(test, assert_instr(vpshrdvw))]
1098	pub fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
1099	unsafe {
1100	let shf: i16x32 = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1101	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x32()))
1102	}
1103	}
1104
1105	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1106	///
1107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi16&expand=5122)
1108	#[inline]
1109	#[target_feature(enable = "avx512vbmi2")]
1110	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1111	#[cfg_attr(test, assert_instr(vpshrdvw))]
1112	pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1113	unsafe {
1114	let shf: i16x32 = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1115	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x32::ZERO))
1116	}
1117	}
1118
1119	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1120	///
1121	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi16&expand=5120)
1122	#[inline]
1123	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1124	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1125	#[cfg_attr(test, assert_instr(vpshrdvw))]
1126	pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1127	unsafe { transmute(src:vpshrdvw256(a:b.as_i16x16(), b:a.as_i16x16(), c.as_i16x16())) }
1128	}
1129
1130	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1131	///
1132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi16&expand=5118)
1133	#[inline]
1134	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1135	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1136	#[cfg_attr(test, assert_instr(vpshrdvw))]
1137	pub fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
1138	unsafe {
1139	let shf: i16x16 = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1140	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x16()))
1141	}
1142	}
1143
1144	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1145	///
1146	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi16&expand=5119)
1147	#[inline]
1148	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1149	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1150	#[cfg_attr(test, assert_instr(vpshrdvw))]
1151	pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1152	unsafe {
1153	let shf: i16x16 = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1154	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x16::ZERO))
1155	}
1156	}
1157
1158	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1159	///
1160	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi16&expand=5117)
1161	#[inline]
1162	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1163	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1164	#[cfg_attr(test, assert_instr(vpshrdvw))]
1165	pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1166	unsafe { transmute(src:vpshrdvw128(a:b.as_i16x8(), b:a.as_i16x8(), c.as_i16x8())) }
1167	}
1168
1169	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1170	///
1171	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi16&expand=5115)
1172	#[inline]
1173	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1174	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1175	#[cfg_attr(test, assert_instr(vpshrdvw))]
1176	pub fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1177	unsafe {
1178	let shf: i16x8 = _mm_shrdv_epi16(a, b, c).as_i16x8();
1179	transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x8()))
1180	}
1181	}
1182
1183	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1184	///
1185	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi16&expand=5116)
1186	#[inline]
1187	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1188	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1189	#[cfg_attr(test, assert_instr(vpshrdvw))]
1190	pub fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1191	unsafe {
1192	let shf: i16x8 = _mm_shrdv_epi16(a, b, c).as_i16x8();
1193	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x8::ZERO))
1194	}
1195	}
1196
1197	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1198	///
1199	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi64&expand=5060)
1200	#[inline]
1201	#[target_feature(enable = "avx512vbmi2")]
1202	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1203	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1204	#[rustc_legacy_const_generics(`2`)]
1205	pub fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1206	static_assert_uimm_bits!(IMM8, `8`);
1207	_mm512_shldv_epi64(a, b, c:_mm512_set1_epi64(IMM8 as i64))
1208	}
1209
1210	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1211	///
1212	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi64&expand=5058)
1213	#[inline]
1214	#[target_feature(enable = "avx512vbmi2")]
1215	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1216	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1217	#[rustc_legacy_const_generics(`4`)]
1218	pub fn _mm512_mask_shldi_epi64<const IMM8: i32>(
1219	src: __m512i,
1220	k: __mmask8,
1221	a: __m512i,
1222	b: __m512i,
1223	) -> __m512i {
1224	unsafe {
1225	static_assert_uimm_bits!(IMM8, `8`);
1226	let shf: i64x8 = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
1227	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
1228	}
1229	}
1230
1231	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1232	///
1233	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi64&expand=5059)
1234	#[inline]
1235	#[target_feature(enable = "avx512vbmi2")]
1236	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1237	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1238	#[rustc_legacy_const_generics(`3`)]
1239	pub fn _mm512_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1240	unsafe {
1241	static_assert_uimm_bits!(IMM8, `8`);
1242	let shf: i64x8 = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
1243	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
1244	}
1245	}
1246
1247	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1248	///
1249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi64&expand=5057)
1250	#[inline]
1251	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1252	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1253	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1254	#[rustc_legacy_const_generics(`2`)]
1255	pub fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1256	static_assert_uimm_bits!(IMM8, `8`);
1257	_mm256_shldv_epi64(a, b, c:_mm256_set1_epi64x(IMM8 as i64))
1258	}
1259
1260	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1261	///
1262	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi64&expand=5055)
1263	#[inline]
1264	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1265	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1266	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1267	#[rustc_legacy_const_generics(`4`)]
1268	pub fn _mm256_mask_shldi_epi64<const IMM8: i32>(
1269	src: __m256i,
1270	k: __mmask8,
1271	a: __m256i,
1272	b: __m256i,
1273	) -> __m256i {
1274	unsafe {
1275	static_assert_uimm_bits!(IMM8, `8`);
1276	let shf: i64x4 = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
1277	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
1278	}
1279	}
1280
1281	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1282	///
1283	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi64&expand=5056)
1284	#[inline]
1285	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1286	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1287	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1288	#[rustc_legacy_const_generics(`3`)]
1289	pub fn _mm256_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1290	unsafe {
1291	static_assert_uimm_bits!(IMM8, `8`);
1292	let shf: i64x4 = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
1293	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
1294	}
1295	}
1296
1297	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1298	///
1299	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi64&expand=5054)
1300	#[inline]
1301	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1302	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1303	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1304	#[rustc_legacy_const_generics(`2`)]
1305	pub fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1306	static_assert_uimm_bits!(IMM8, `8`);
1307	_mm_shldv_epi64(a, b, c:_mm_set1_epi64x(IMM8 as i64))
1308	}
1309
1310	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1311	///
1312	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi64&expand=5052)
1313	#[inline]
1314	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1315	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1316	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1317	#[rustc_legacy_const_generics(`4`)]
1318	pub fn _mm_mask_shldi_epi64<const IMM8: i32>(
1319	src: __m128i,
1320	k: __mmask8,
1321	a: __m128i,
1322	b: __m128i,
1323	) -> __m128i {
1324	unsafe {
1325	static_assert_uimm_bits!(IMM8, `8`);
1326	let shf: i64x2 = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
1327	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
1328	}
1329	}
1330
1331	/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1332	///
1333	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi64&expand=5053)
1334	#[inline]
1335	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1336	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1337	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))]
1338	#[rustc_legacy_const_generics(`3`)]
1339	pub fn _mm_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1340	unsafe {
1341	static_assert_uimm_bits!(IMM8, `8`);
1342	let shf: i64x2 = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
1343	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
1344	}
1345	}
1346
1347	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1348	///
1349	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi32&expand=5051)
1350	#[inline]
1351	#[target_feature(enable = "avx512vbmi2")]
1352	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1353	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1354	#[rustc_legacy_const_generics(`2`)]
1355	pub fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1356	static_assert_uimm_bits!(IMM8, `8`);
1357	_mm512_shldv_epi32(a, b, c:_mm512_set1_epi32(IMM8))
1358	}
1359
1360	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1361	///
1362	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi32&expand=5049)
1363	#[inline]
1364	#[target_feature(enable = "avx512vbmi2")]
1365	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1366	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1367	#[rustc_legacy_const_generics(`4`)]
1368	pub fn _mm512_mask_shldi_epi32<const IMM8: i32>(
1369	src: __m512i,
1370	k: __mmask16,
1371	a: __m512i,
1372	b: __m512i,
1373	) -> __m512i {
1374	unsafe {
1375	static_assert_uimm_bits!(IMM8, `8`);
1376	let shf: i32x16 = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
1377	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
1378	}
1379	}
1380
1381	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1382	///
1383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi32&expand=5050)
1384	#[inline]
1385	#[target_feature(enable = "avx512vbmi2")]
1386	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1387	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1388	#[rustc_legacy_const_generics(`3`)]
1389	pub fn _mm512_maskz_shldi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1390	unsafe {
1391	static_assert_uimm_bits!(IMM8, `8`);
1392	let shf: i32x16 = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
1393	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
1394	}
1395	}
1396
1397	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1398	///
1399	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi32&expand=5048)
1400	#[inline]
1401	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1402	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1403	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1404	#[rustc_legacy_const_generics(`2`)]
1405	pub fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1406	static_assert_uimm_bits!(IMM8, `8`);
1407	_mm256_shldv_epi32(a, b, c:_mm256_set1_epi32(IMM8))
1408	}
1409
1410	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1411	///
1412	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi32&expand=5046)
1413	#[inline]
1414	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1415	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1416	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1417	#[rustc_legacy_const_generics(`4`)]
1418	pub fn _mm256_mask_shldi_epi32<const IMM8: i32>(
1419	src: __m256i,
1420	k: __mmask8,
1421	a: __m256i,
1422	b: __m256i,
1423	) -> __m256i {
1424	unsafe {
1425	static_assert_uimm_bits!(IMM8, `8`);
1426	let shf: i32x8 = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
1427	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
1428	}
1429	}
1430
1431	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1432	///
1433	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi32&expand=5047)
1434	#[inline]
1435	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1436	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1437	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1438	#[rustc_legacy_const_generics(`3`)]
1439	pub fn _mm256_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1440	unsafe {
1441	static_assert_uimm_bits!(IMM8, `8`);
1442	let shf: i32x8 = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
1443	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
1444	}
1445	}
1446
1447	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1448	///
1449	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi32&expand=5045)
1450	#[inline]
1451	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1452	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1453	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1454	#[rustc_legacy_const_generics(`2`)]
1455	pub fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1456	static_assert_uimm_bits!(IMM8, `8`);
1457	_mm_shldv_epi32(a, b, c:_mm_set1_epi32(IMM8))
1458	}
1459
1460	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1461	///
1462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi32&expand=5043)
1463	#[inline]
1464	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1465	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1466	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1467	#[rustc_legacy_const_generics(`4`)]
1468	pub fn _mm_mask_shldi_epi32<const IMM8: i32>(
1469	src: __m128i,
1470	k: __mmask8,
1471	a: __m128i,
1472	b: __m128i,
1473	) -> __m128i {
1474	unsafe {
1475	static_assert_uimm_bits!(IMM8, `8`);
1476	let shf: i32x4 = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
1477	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
1478	}
1479	}
1480
1481	/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1482	///
1483	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi32&expand=5044)
1484	#[inline]
1485	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1486	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1487	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))]
1488	#[rustc_legacy_const_generics(`3`)]
1489	pub fn _mm_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1490	unsafe {
1491	static_assert_uimm_bits!(IMM8, `8`);
1492	let shf: i32x4 = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
1493	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
1494	}
1495	}
1496
1497	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1498	///
1499	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi16&expand=5042)
1500	#[inline]
1501	#[target_feature(enable = "avx512vbmi2")]
1502	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1503	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1504	#[rustc_legacy_const_generics(`2`)]
1505	pub fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1506	static_assert_uimm_bits!(IMM8, `8`);
1507	_mm512_shldv_epi16(a, b, c:_mm512_set1_epi16(IMM8 as i16))
1508	}
1509
1510	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1511	///
1512	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi16&expand=5040)
1513	#[inline]
1514	#[target_feature(enable = "avx512vbmi2")]
1515	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1516	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1517	#[rustc_legacy_const_generics(`4`)]
1518	pub fn _mm512_mask_shldi_epi16<const IMM8: i32>(
1519	src: __m512i,
1520	k: __mmask32,
1521	a: __m512i,
1522	b: __m512i,
1523	) -> __m512i {
1524	unsafe {
1525	static_assert_uimm_bits!(IMM8, `8`);
1526	let shf: i16x32 = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
1527	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x32()))
1528	}
1529	}
1530
1531	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1532	///
1533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi16&expand=5041)
1534	#[inline]
1535	#[target_feature(enable = "avx512vbmi2")]
1536	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1537	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1538	#[rustc_legacy_const_generics(`3`)]
1539	pub fn _mm512_maskz_shldi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1540	unsafe {
1541	static_assert_uimm_bits!(IMM8, `8`);
1542	let shf: i16x32 = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
1543	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x32::ZERO))
1544	}
1545	}
1546
1547	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1548	///
1549	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi16&expand=5039)
1550	#[inline]
1551	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1552	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1553	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1554	#[rustc_legacy_const_generics(`2`)]
1555	pub fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1556	static_assert_uimm_bits!(IMM8, `8`);
1557	_mm256_shldv_epi16(a, b, c:_mm256_set1_epi16(IMM8 as i16))
1558	}
1559
1560	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1561	///
1562	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi16&expand=5037)
1563	#[inline]
1564	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1565	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1566	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1567	#[rustc_legacy_const_generics(`4`)]
1568	pub fn _mm256_mask_shldi_epi16<const IMM8: i32>(
1569	src: __m256i,
1570	k: __mmask16,
1571	a: __m256i,
1572	b: __m256i,
1573	) -> __m256i {
1574	unsafe {
1575	static_assert_uimm_bits!(IMM8, `8`);
1576	let shf: i16x16 = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
1577	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x16()))
1578	}
1579	}
1580
1581	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1582	///
1583	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi16&expand=5038)
1584	#[inline]
1585	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1586	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1587	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1588	#[rustc_legacy_const_generics(`3`)]
1589	pub fn _mm256_maskz_shldi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1590	unsafe {
1591	static_assert_uimm_bits!(IMM8, `8`);
1592	let shf: i16x16 = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
1593	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x16::ZERO))
1594	}
1595	}
1596
1597	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1598	///
1599	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi16&expand=5036)
1600	#[inline]
1601	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1602	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1603	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1604	#[rustc_legacy_const_generics(`2`)]
1605	pub fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1606	static_assert_uimm_bits!(IMM8, `8`);
1607	_mm_shldv_epi16(a, b, c:_mm_set1_epi16(IMM8 as i16))
1608	}
1609
1610	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1611	///
1612	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi16&expand=5034)
1613	#[inline]
1614	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1615	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1616	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1617	#[rustc_legacy_const_generics(`4`)]
1618	pub fn _mm_mask_shldi_epi16<const IMM8: i32>(
1619	src: __m128i,
1620	k: __mmask8,
1621	a: __m128i,
1622	b: __m128i,
1623	) -> __m128i {
1624	unsafe {
1625	static_assert_uimm_bits!(IMM8, `8`);
1626	let shf: i16x8 = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
1627	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x8()))
1628	}
1629	}
1630
1631	/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1632	///
1633	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi16&expand=5035)
1634	#[inline]
1635	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1636	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1637	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))]
1638	#[rustc_legacy_const_generics(`3`)]
1639	pub fn _mm_maskz_shldi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1640	unsafe {
1641	static_assert_uimm_bits!(IMM8, `8`);
1642	let shf: i16x8 = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
1643	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x8::ZERO))
1644	}
1645	}
1646
1647	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1648	///
1649	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi64&expand=5114)
1650	#[inline]
1651	#[target_feature(enable = "avx512vbmi2")]
1652	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1653	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1654	#[rustc_legacy_const_generics(`2`)]
1655	pub fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1656	static_assert_uimm_bits!(IMM8, `8`);
1657	_mm512_shrdv_epi64(a, b, c:_mm512_set1_epi64(IMM8 as i64))
1658	}
1659
1660	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1661	///
1662	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi64&expand=5112)
1663	#[inline]
1664	#[target_feature(enable = "avx512vbmi2")]
1665	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1666	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1667	#[rustc_legacy_const_generics(`4`)]
1668	pub fn _mm512_mask_shrdi_epi64<const IMM8: i32>(
1669	src: __m512i,
1670	k: __mmask8,
1671	a: __m512i,
1672	b: __m512i,
1673	) -> __m512i {
1674	unsafe {
1675	static_assert_uimm_bits!(IMM8, `8`);
1676	let shf: i64x8 = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
1677	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
1678	}
1679	}
1680
1681	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1682	///
1683	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi64&expand=5113)
1684	#[inline]
1685	#[target_feature(enable = "avx512vbmi2")]
1686	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1687	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `255`))] //should be vpshrdq
1688	#[rustc_legacy_const_generics(`3`)]
1689	pub fn _mm512_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1690	unsafe {
1691	static_assert_uimm_bits!(IMM8, `8`);
1692	let shf: i64x8 = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
1693	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
1694	}
1695	}
1696
1697	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1698	///
1699	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi64&expand=5111)
1700	#[inline]
1701	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1702	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1703	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1704	#[rustc_legacy_const_generics(`2`)]
1705	pub fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1706	static_assert_uimm_bits!(IMM8, `8`);
1707	_mm256_shrdv_epi64(a, b, c:_mm256_set1_epi64x(IMM8 as i64))
1708	}
1709
1710	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1711	///
1712	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi64&expand=5109)
1713	#[inline]
1714	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1715	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1716	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1717	#[rustc_legacy_const_generics(`4`)]
1718	pub fn _mm256_mask_shrdi_epi64<const IMM8: i32>(
1719	src: __m256i,
1720	k: __mmask8,
1721	a: __m256i,
1722	b: __m256i,
1723	) -> __m256i {
1724	unsafe {
1725	static_assert_uimm_bits!(IMM8, `8`);
1726	let shf: i64x4 = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
1727	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
1728	}
1729	}
1730
1731	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1732	///
1733	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi64&expand=5110)
1734	#[inline]
1735	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1736	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1737	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1738	#[rustc_legacy_const_generics(`3`)]
1739	pub fn _mm256_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1740	unsafe {
1741	static_assert_uimm_bits!(IMM8, `8`);
1742	let shf: i64x4 = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
1743	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
1744	}
1745	}
1746
1747	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1748	///
1749	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi64&expand=5108)
1750	#[inline]
1751	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1752	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1753	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1754	#[rustc_legacy_const_generics(`2`)]
1755	pub fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1756	static_assert_uimm_bits!(IMM8, `8`);
1757	_mm_shrdv_epi64(a, b, c:_mm_set1_epi64x(IMM8 as i64))
1758	}
1759
1760	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1761	///
1762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi64&expand=5106)
1763	#[inline]
1764	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1765	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1766	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1767	#[rustc_legacy_const_generics(`4`)]
1768	pub fn _mm_mask_shrdi_epi64<const IMM8: i32>(
1769	src: __m128i,
1770	k: __mmask8,
1771	a: __m128i,
1772	b: __m128i,
1773	) -> __m128i {
1774	unsafe {
1775	static_assert_uimm_bits!(IMM8, `8`);
1776	let shf: i64x2 = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
1777	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
1778	}
1779	}
1780
1781	/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1782	///
1783	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi64&expand=5107)
1784	#[inline]
1785	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1786	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1787	#[cfg_attr(test, assert_instr(vpshldq, IMM8 = `5`))] //should be vpshrdq
1788	#[rustc_legacy_const_generics(`3`)]
1789	pub fn _mm_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1790	unsafe {
1791	static_assert_uimm_bits!(IMM8, `8`);
1792	let shf: i64x2 = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
1793	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
1794	}
1795	}
1796
1797	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1798	///
1799	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi32&expand=5105)
1800	#[inline]
1801	#[target_feature(enable = "avx512vbmi2")]
1802	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1803	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
1804	#[rustc_legacy_const_generics(`2`)]
1805	pub fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1806	static_assert_uimm_bits!(IMM8, `8`);
1807	_mm512_shrdv_epi32(a, b, c:_mm512_set1_epi32(IMM8))
1808	}
1809
1810	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1811	///
1812	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi32&expand=5103)
1813	#[inline]
1814	#[target_feature(enable = "avx512vbmi2")]
1815	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1816	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
1817	#[rustc_legacy_const_generics(`4`)]
1818	pub fn _mm512_mask_shrdi_epi32<const IMM8: i32>(
1819	src: __m512i,
1820	k: __mmask16,
1821	a: __m512i,
1822	b: __m512i,
1823	) -> __m512i {
1824	unsafe {
1825	static_assert_uimm_bits!(IMM8, `8`);
1826	let shf: i32x16 = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
1827	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
1828	}
1829	}
1830
1831	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1832	///
1833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi32&expand=5104)
1834	#[inline]
1835	#[target_feature(enable = "avx512vbmi2")]
1836	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1837	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
1838	#[rustc_legacy_const_generics(`3`)]
1839	pub fn _mm512_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1840	unsafe {
1841	static_assert_uimm_bits!(IMM8, `8`);
1842	let shf: i32x16 = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
1843	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
1844	}
1845	}
1846
1847	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1848	///
1849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi32&expand=5102)
1850	#[inline]
1851	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1852	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1853	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
1854	#[rustc_legacy_const_generics(`2`)]
1855	pub fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1856	static_assert_uimm_bits!(IMM8, `8`);
1857	_mm256_shrdv_epi32(a, b, c:_mm256_set1_epi32(IMM8))
1858	}
1859
1860	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1861	///
1862	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi32&expand=5100)
1863	#[inline]
1864	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1865	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1866	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
1867	#[rustc_legacy_const_generics(`4`)]
1868	pub fn _mm256_mask_shrdi_epi32<const IMM8: i32>(
1869	src: __m256i,
1870	k: __mmask8,
1871	a: __m256i,
1872	b: __m256i,
1873	) -> __m256i {
1874	unsafe {
1875	static_assert_uimm_bits!(IMM8, `8`);
1876	let shf: i32x8 = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
1877	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
1878	}
1879	}
1880
1881	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1882	///
1883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi32&expand=5101)
1884	#[inline]
1885	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1886	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1887	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
1888	#[rustc_legacy_const_generics(`3`)]
1889	pub fn _mm256_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1890	unsafe {
1891	static_assert_uimm_bits!(IMM8, `8`);
1892	let shf: i32x8 = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
1893	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
1894	}
1895	}
1896
1897	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1898	///
1899	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi32&expand=5099)
1900	#[inline]
1901	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1902	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1903	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
1904	#[rustc_legacy_const_generics(`2`)]
1905	pub fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1906	static_assert_uimm_bits!(IMM8, `8`);
1907	_mm_shrdv_epi32(a, b, c:_mm_set1_epi32(IMM8))
1908	}
1909
1910	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1911	///
1912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi32&expand=5097)
1913	#[inline]
1914	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1915	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1916	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
1917	#[rustc_legacy_const_generics(`4`)]
1918	pub fn _mm_mask_shrdi_epi32<const IMM8: i32>(
1919	src: __m128i,
1920	k: __mmask8,
1921	a: __m128i,
1922	b: __m128i,
1923	) -> __m128i {
1924	unsafe {
1925	static_assert_uimm_bits!(IMM8, `8`);
1926	let shf: i32x4 = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
1927	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
1928	}
1929	}
1930
1931	/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1932	///
1933	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi32&expand=5098)
1934	#[inline]
1935	#[target_feature(enable = "avx512vbmi2,avx512vl")]
1936	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1937	#[cfg_attr(test, assert_instr(vpshldd, IMM8 = `5`))] //should be vpshldd
1938	#[rustc_legacy_const_generics(`3`)]
1939	pub fn _mm_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1940	unsafe {
1941	static_assert_uimm_bits!(IMM8, `8`);
1942	let shf: i32x4 = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
1943	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
1944	}
1945	}
1946
1947	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
1948	///
1949	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi16&expand=5096)
1950	#[inline]
1951	#[target_feature(enable = "avx512vbmi2")]
1952	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1953	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
1954	#[rustc_legacy_const_generics(`2`)]
1955	pub fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1956	static_assert_uimm_bits!(IMM8, `8`);
1957	_mm512_shrdv_epi16(a, b, c:_mm512_set1_epi16(IMM8 as i16))
1958	}
1959
1960	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1961	///
1962	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi16&expand=5094)
1963	#[inline]
1964	#[target_feature(enable = "avx512vbmi2")]
1965	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1966	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
1967	#[rustc_legacy_const_generics(`4`)]
1968	pub fn _mm512_mask_shrdi_epi16<const IMM8: i32>(
1969	src: __m512i,
1970	k: __mmask32,
1971	a: __m512i,
1972	b: __m512i,
1973	) -> __m512i {
1974	unsafe {
1975	static_assert_uimm_bits!(IMM8, `8`);
1976	let shf: i16x32 = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
1977	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x32()))
1978	}
1979	}
1980
1981	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1982	///
1983	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi16&expand=5095)
1984	#[inline]
1985	#[target_feature(enable = "avx512vbmi2")]
1986	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1987	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
1988	#[rustc_legacy_const_generics(`3`)]
1989	pub fn _mm512_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1990	unsafe {
1991	static_assert_uimm_bits!(IMM8, `8`);
1992	let shf: i16x32 = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
1993	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x32::ZERO))
1994	}
1995	}
1996
1997	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
1998	///
1999	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi16&expand=5093)
2000	#[inline]
2001	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2002	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2003	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2004	#[rustc_legacy_const_generics(`2`)]
2005	pub fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2006	static_assert_uimm_bits!(IMM8, `8`);
2007	_mm256_shrdv_epi16(a, b, c:_mm256_set1_epi16(IMM8 as i16))
2008	}
2009
2010	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2011	///
2012	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi16&expand=5091)
2013	#[inline]
2014	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2015	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2016	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2017	#[rustc_legacy_const_generics(`4`)]
2018	pub fn _mm256_mask_shrdi_epi16<const IMM8: i32>(
2019	src: __m256i,
2020	k: __mmask16,
2021	a: __m256i,
2022	b: __m256i,
2023	) -> __m256i {
2024	unsafe {
2025	static_assert_uimm_bits!(IMM8, `8`);
2026	let shf: i16x16 = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
2027	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x16()))
2028	}
2029	}
2030
2031	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2032	///
2033	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi16&expand=5092)
2034	#[inline]
2035	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2036	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2037	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2038	#[rustc_legacy_const_generics(`3`)]
2039	pub fn _mm256_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2040	unsafe {
2041	static_assert_uimm_bits!(IMM8, `8`);
2042	let shf: i16x16 = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
2043	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x16::ZERO))
2044	}
2045	}
2046
2047	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2048	///
2049	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi16&expand=5090)
2050	#[inline]
2051	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2052	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2053	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2054	#[rustc_legacy_const_generics(`2`)]
2055	pub fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2056	static_assert_uimm_bits!(IMM8, `8`);
2057	_mm_shrdv_epi16(a, b, c:_mm_set1_epi16(IMM8 as i16))
2058	}
2059
2060	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2061	///
2062	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi16&expand=5088)
2063	#[inline]
2064	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2065	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2066	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2067	#[rustc_legacy_const_generics(`4`)]
2068	pub fn _mm_mask_shrdi_epi16<const IMM8: i32>(
2069	src: __m128i,
2070	k: __mmask8,
2071	a: __m128i,
2072	b: __m128i,
2073	) -> __m128i {
2074	unsafe {
2075	static_assert_uimm_bits!(IMM8, `8`);
2076	let shf: i16x8 = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
2077	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x8()))
2078	}
2079	}
2080
2081	/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2082	///
2083	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi16&expand=5089)
2084	#[inline]
2085	#[target_feature(enable = "avx512vbmi2,avx512vl")]
2086	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2087	#[cfg_attr(test, assert_instr(vpshldw, IMM8 = `5`))] //should be vpshrdw
2088	#[rustc_legacy_const_generics(`3`)]
2089	pub fn _mm_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2090	unsafe {
2091	static_assert_uimm_bits!(IMM8, `8`);
2092	let shf: i16x8 = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
2093	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x8::ZERO))
2094	}
2095	}
2096
2097	#[allow(improper_ctypes)]
2098	unsafe extern "C" {
2099	#[link_name = "llvm.x86.avx512.mask.compress.store.w.512"]
2100	unsafefn vcompressstorew(mem: *mut i8, data: i16x32, mask: u32);
2101	#[link_name = "llvm.x86.avx512.mask.compress.store.w.256"]
2102	unsafefn vcompressstorew256(mem: *mut i8, data: i16x16, mask: u16);
2103	#[link_name = "llvm.x86.avx512.mask.compress.store.w.128"]
2104	unsafefn vcompressstorew128(mem: *mut i8, data: i16x8, mask: u8);
2105
2106	#[link_name = "llvm.x86.avx512.mask.compress.store.b.512"]
2107	unsafefn vcompressstoreb(mem: *mut i8, data: i8x64, mask: u64);
2108	#[link_name = "llvm.x86.avx512.mask.compress.store.b.256"]
2109	unsafefn vcompressstoreb256(mem: *mut i8, data: i8x32, mask: u32);
2110	#[link_name = "llvm.x86.avx512.mask.compress.store.b.128"]
2111	unsafefn vcompressstoreb128(mem: *mut i8, data: i8x16, mask: u16);
2112
2113	#[link_name = "llvm.x86.avx512.mask.compress.w.512"]
2114	unsafefn vpcompressw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2115	#[link_name = "llvm.x86.avx512.mask.compress.w.256"]
2116	unsafefn vpcompressw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2117	#[link_name = "llvm.x86.avx512.mask.compress.w.128"]
2118	unsafefn vpcompressw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2119
2120	#[link_name = "llvm.x86.avx512.mask.compress.b.512"]
2121	unsafefn vpcompressb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2122	#[link_name = "llvm.x86.avx512.mask.compress.b.256"]
2123	unsafefn vpcompressb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2124	#[link_name = "llvm.x86.avx512.mask.compress.b.128"]
2125	unsafefn vpcompressb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2126
2127	#[link_name = "llvm.x86.avx512.mask.expand.w.512"]
2128	unsafefn vpexpandw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2129	#[link_name = "llvm.x86.avx512.mask.expand.w.256"]
2130	unsafefn vpexpandw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2131	#[link_name = "llvm.x86.avx512.mask.expand.w.128"]
2132	unsafefn vpexpandw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2133
2134	#[link_name = "llvm.x86.avx512.mask.expand.b.512"]
2135	unsafefn vpexpandb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2136	#[link_name = "llvm.x86.avx512.mask.expand.b.256"]
2137	unsafefn vpexpandb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2138	#[link_name = "llvm.x86.avx512.mask.expand.b.128"]
2139	unsafefn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2140
2141	#[link_name = "llvm.fshl.v8i64"]
2142	unsafefn vpshldvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
2143	#[link_name = "llvm.fshl.v4i64"]
2144	unsafefn vpshldvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
2145	#[link_name = "llvm.fshl.v2i64"]
2146	unsafefn vpshldvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
2147	#[link_name = "llvm.fshl.v16i32"]
2148	unsafefn vpshldvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
2149	#[link_name = "llvm.fshl.v8i32"]
2150	unsafefn vpshldvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
2151	#[link_name = "llvm.fshl.v4i32"]
2152	unsafefn vpshldvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
2153	#[link_name = "llvm.fshl.v32i16"]
2154	unsafefn vpshldvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
2155	#[link_name = "llvm.fshl.v16i16"]
2156	unsafefn vpshldvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
2157	#[link_name = "llvm.fshl.v8i16"]
2158	unsafefn vpshldvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
2159
2160	#[link_name = "llvm.fshr.v8i64"]
2161	unsafefn vpshrdvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
2162	#[link_name = "llvm.fshr.v4i64"]
2163	unsafefn vpshrdvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
2164	#[link_name = "llvm.fshr.v2i64"]
2165	unsafefn vpshrdvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
2166	#[link_name = "llvm.fshr.v16i32"]
2167	unsafefn vpshrdvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
2168	#[link_name = "llvm.fshr.v8i32"]
2169	unsafefn vpshrdvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
2170	#[link_name = "llvm.fshr.v4i32"]
2171	unsafefn vpshrdvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
2172	#[link_name = "llvm.fshr.v32i16"]
2173	unsafefn vpshrdvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
2174	#[link_name = "llvm.fshr.v16i16"]
2175	unsafefn vpshrdvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
2176	#[link_name = "llvm.fshr.v8i16"]
2177	unsafefn vpshrdvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
2178
2179	#[link_name = "llvm.x86.avx512.mask.expand.load.b.128"]
2180	unsafefn expandloadb_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
2181	#[link_name = "llvm.x86.avx512.mask.expand.load.w.128"]
2182	unsafefn expandloadw_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
2183	#[link_name = "llvm.x86.avx512.mask.expand.load.b.256"]
2184	unsafefn expandloadb_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
2185	#[link_name = "llvm.x86.avx512.mask.expand.load.w.256"]
2186	unsafefn expandloadw_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
2187	#[link_name = "llvm.x86.avx512.mask.expand.load.b.512"]
2188	unsafefn expandloadb_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
2189	#[link_name = "llvm.x86.avx512.mask.expand.load.w.512"]
2190	unsafefn expandloadw_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
2191	}
2192
2193	#[cfg(test)]
2194	mod tests {
2195
2196	use stdarch_test::simd_test;
2197
2198	use crate::core_arch::x86::*;
2199	use crate::hint::black_box;
2200
2201	#[simd_test(enable = "avx512vbmi2")]
2202	unsafe fn test_mm512_mask_compress_epi16() {
2203	let src = _mm512_set1_epi16(`200`);
2204	#[rustfmt::skip]
2205	let a = _mm512_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2206	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2207	let r = _mm512_mask_compress_epi16(src, `0b01010101_01010101_01010101_01010101`, a);
2208	#[rustfmt::skip]
2209	let e = _mm512_set_epi16(
2210	`200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`,
2211	`1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`, `17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
2212	);
2213	assert_eq_m512i(r, e);
2214	}
2215
2216	#[simd_test(enable = "avx512vbmi2")]
2217	unsafe fn test_mm512_maskz_compress_epi16() {
2218	#[rustfmt::skip]
2219	let a = _mm512_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2220	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2221	let r = _mm512_maskz_compress_epi16(`0b01010101_01010101_01010101_01010101`, a);
2222	#[rustfmt::skip]
2223	let e = _mm512_set_epi16(
2224	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
2225	`1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`, `17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
2226	);
2227	assert_eq_m512i(r, e);
2228	}
2229
2230	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2231	unsafe fn test_mm256_mask_compress_epi16() {
2232	let src = _mm256_set1_epi16(`200`);
2233	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2234	let r = _mm256_mask_compress_epi16(src, `0b01010101_01010101`, a);
2235	let e = _mm256_set_epi16(
2236	`200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`,
2237	);
2238	assert_eq_m256i(r, e);
2239	}
2240
2241	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2242	unsafe fn test_mm256_maskz_compress_epi16() {
2243	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2244	let r = _mm256_maskz_compress_epi16(`0b01010101_01010101`, a);
2245	let e = _mm256_set_epi16(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`);
2246	assert_eq_m256i(r, e);
2247	}
2248
2249	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2250	unsafe fn test_mm_mask_compress_epi16() {
2251	let src = _mm_set1_epi16(`200`);
2252	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
2253	let r = _mm_mask_compress_epi16(src, `0b01010101`, a);
2254	let e = _mm_set_epi16(`200`, `200`, `200`, `200`, `1`, `3`, `5`, `7`);
2255	assert_eq_m128i(r, e);
2256	}
2257
2258	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2259	unsafe fn test_mm_maskz_compress_epi16() {
2260	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
2261	let r = _mm_maskz_compress_epi16(`0b01010101`, a);
2262	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `1`, `3`, `5`, `7`);
2263	assert_eq_m128i(r, e);
2264	}
2265
2266	#[simd_test(enable = "avx512vbmi2")]
2267	unsafe fn test_mm512_mask_compress_epi8() {
2268	let src = _mm512_set1_epi8(`100`);
2269	#[rustfmt::skip]
2270	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2271	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
2272	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
2273	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
2274	let r = _mm512_mask_compress_epi8(
2275	src,
2276	`0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101`,
2277	a,
2278	);
2279	#[rustfmt::skip]
2280	let e = _mm512_set_epi8(
2281	`100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`,
2282	`100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`,
2283	`1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`, `17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
2284	`33`, `35`, `37`, `39`, `41`, `43`, `45`, `47`, `49`, `51`, `53`, `55`, `57`, `59`, `61`, `63`,
2285	);
2286	assert_eq_m512i(r, e);
2287	}
2288
2289	#[simd_test(enable = "avx512vbmi2")]
2290	unsafe fn test_mm512_maskz_compress_epi8() {
2291	#[rustfmt::skip]
2292	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2293	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
2294	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
2295	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
2296	let r = _mm512_maskz_compress_epi8(
2297	`0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101`,
2298	a,
2299	);
2300	#[rustfmt::skip]
2301	let e = _mm512_set_epi8(
2302	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
2303	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
2304	`1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`, `17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
2305	`33`, `35`, `37`, `39`, `41`, `43`, `45`, `47`, `49`, `51`, `53`, `55`, `57`, `59`, `61`, `63`,
2306	);
2307	assert_eq_m512i(r, e);
2308	}
2309
2310	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2311	unsafe fn test_mm256_mask_compress_epi8() {
2312	let src = _mm256_set1_epi8(`100`);
2313	#[rustfmt::skip]
2314	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2315	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2316	let r = _mm256_mask_compress_epi8(src, `0b01010101_01010101_01010101_01010101`, a);
2317	#[rustfmt::skip]
2318	let e = _mm256_set_epi8(
2319	`100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`,
2320	`1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`, `17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
2321	);
2322	assert_eq_m256i(r, e);
2323	}
2324
2325	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2326	unsafe fn test_mm256_maskz_compress_epi8() {
2327	#[rustfmt::skip]
2328	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2329	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2330	let r = _mm256_maskz_compress_epi8(`0b01010101_01010101_01010101_01010101`, a);
2331	#[rustfmt::skip]
2332	let e = _mm256_set_epi8(
2333	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
2334	`1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`, `17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
2335	);
2336	assert_eq_m256i(r, e);
2337	}
2338
2339	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2340	unsafe fn test_mm_mask_compress_epi8() {
2341	let src = _mm_set1_epi8(`100`);
2342	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2343	let r = _mm_mask_compress_epi8(src, `0b01010101_01010101`, a);
2344	let e = _mm_set_epi8(
2345	`100`, `100`, `100`, `100`, `100`, `100`, `100`, `100`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`,
2346	);
2347	assert_eq_m128i(r, e);
2348	}
2349
2350	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2351	unsafe fn test_mm_maskz_compress_epi8() {
2352	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2353	let r = _mm_maskz_compress_epi8(`0b01010101_01010101`, a);
2354	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`);
2355	assert_eq_m128i(r, e);
2356	}
2357
2358	#[simd_test(enable = "avx512vbmi2")]
2359	unsafe fn test_mm512_mask_expand_epi16() {
2360	let src = _mm512_set1_epi16(`200`);
2361	#[rustfmt::skip]
2362	let a = _mm512_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2363	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2364	let r = _mm512_mask_expand_epi16(src, `0b01010101_01010101_01010101_01010101`, a);
2365	#[rustfmt::skip]
2366	let e = _mm512_set_epi16(
2367	`200`, `16`, `200`, `17`, `200`, `18`, `200`, `19`, `200`, `20`, `200`, `21`, `200`, `22`, `200`, `23`,
2368	`200`, `24`, `200`, `25`, `200`, `26`, `200`, `27`, `200`, `28`, `200`, `29`, `200`, `30`, `200`, `31`,
2369	);
2370	assert_eq_m512i(r, e);
2371	}
2372
2373	#[simd_test(enable = "avx512vbmi2")]
2374	unsafe fn test_mm512_maskz_expand_epi16() {
2375	#[rustfmt::skip]
2376	let a = _mm512_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2377	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2378	let r = _mm512_maskz_expand_epi16(`0b01010101_01010101_01010101_01010101`, a);
2379	#[rustfmt::skip]
2380	let e = _mm512_set_epi16(`0`, `16`, `0`, `17`, `0`, `18`, `0`, `19`, `0`, `20`, `0`, `21`, `0`, `22`, `0`, `23`,
2381	`0`, `24`, `0`, `25`, `0`, `26`, `0`, `27`, `0`, `28`, `0`, `29`, `0`, `30`, `0`, `31`);
2382	assert_eq_m512i(r, e);
2383	}
2384
2385	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2386	unsafe fn test_mm256_mask_expand_epi16() {
2387	let src = _mm256_set1_epi16(`200`);
2388	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2389	let r = _mm256_mask_expand_epi16(src, `0b01010101_01010101`, a);
2390	let e = _mm256_set_epi16(
2391	`200`, `8`, `200`, `9`, `200`, `10`, `200`, `11`, `200`, `12`, `200`, `13`, `200`, `14`, `200`, `15`,
2392	);
2393	assert_eq_m256i(r, e);
2394	}
2395
2396	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2397	unsafe fn test_mm256_maskz_expand_epi16() {
2398	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2399	let r = _mm256_maskz_expand_epi16(`0b01010101_01010101`, a);
2400	let e = _mm256_set_epi16(`0`, `8`, `0`, `9`, `0`, `10`, `0`, `11`, `0`, `12`, `0`, `13`, `0`, `14`, `0`, `15`);
2401	assert_eq_m256i(r, e);
2402	}
2403
2404	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2405	unsafe fn test_mm_mask_expand_epi16() {
2406	let src = _mm_set1_epi16(`200`);
2407	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
2408	let r = _mm_mask_expand_epi16(src, `0b01010101`, a);
2409	let e = _mm_set_epi16(`200`, `4`, `200`, `5`, `200`, `6`, `200`, `7`);
2410	assert_eq_m128i(r, e);
2411	}
2412
2413	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2414	unsafe fn test_mm_maskz_expand_epi16() {
2415	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
2416	let r = _mm_maskz_expand_epi16(`0b01010101`, a);
2417	let e = _mm_set_epi16(`0`, `4`, `0`, `5`, `0`, `6`, `0`, `7`);
2418	assert_eq_m128i(r, e);
2419	}
2420
2421	#[simd_test(enable = "avx512vbmi2")]
2422	unsafe fn test_mm512_mask_expand_epi8() {
2423	let src = _mm512_set1_epi8(`100`);
2424	#[rustfmt::skip]
2425	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2426	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
2427	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
2428	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
2429	let r = _mm512_mask_expand_epi8(
2430	src,
2431	`0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101`,
2432	a,
2433	);
2434	#[rustfmt::skip]
2435	let e = _mm512_set_epi8(
2436	`100`, `32`, `100`, `33`, `100`, `34`, `100`, `35`, `100`, `36`, `100`, `37`, `100`, `38`, `100`, `39`,
2437	`100`, `40`, `100`, `41`, `100`, `42`, `100`, `43`, `100`, `44`, `100`, `45`, `100`, `46`, `100`, `47`,
2438	`100`, `48`, `100`, `49`, `100`, `50`, `100`, `51`, `100`, `52`, `100`, `53`, `100`, `54`, `100`, `55`,
2439	`100`, `56`, `100`, `57`, `100`, `58`, `100`, `59`, `100`, `60`, `100`, `61`, `100`, `62`, `100`, `63`,
2440	);
2441	assert_eq_m512i(r, e);
2442	}
2443
2444	#[simd_test(enable = "avx512vbmi2")]
2445	unsafe fn test_mm512_maskz_expand_epi8() {
2446	#[rustfmt::skip]
2447	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2448	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
2449	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
2450	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
2451	let r = _mm512_maskz_expand_epi8(
2452	`0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101`,
2453	a,
2454	);
2455	#[rustfmt::skip]
2456	let e = _mm512_set_epi8(
2457	`0`, `32`, `0`, `33`, `0`, `34`, `0`, `35`, `0`, `36`, `0`, `37`, `0`, `38`, `0`, `39`,
2458	`0`, `40`, `0`, `41`, `0`, `42`, `0`, `43`, `0`, `44`, `0`, `45`, `0`, `46`, `0`, `47`,
2459	`0`, `48`, `0`, `49`, `0`, `50`, `0`, `51`, `0`, `52`, `0`, `53`, `0`, `54`, `0`, `55`,
2460	`0`, `56`, `0`, `57`, `0`, `58`, `0`, `59`, `0`, `60`, `0`, `61`, `0`, `62`, `0`, `63`,
2461	);
2462	assert_eq_m512i(r, e);
2463	}
2464
2465	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2466	unsafe fn test_mm256_mask_expand_epi8() {
2467	let src = _mm256_set1_epi8(`100`);
2468	#[rustfmt::skip]
2469	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2470	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2471	let r = _mm256_mask_expand_epi8(src, `0b01010101_01010101_01010101_01010101`, a);
2472	#[rustfmt::skip]
2473	let e = _mm256_set_epi8(
2474	`100`, `16`, `100`, `17`, `100`, `18`, `100`, `19`, `100`, `20`, `100`, `21`, `100`, `22`, `100`, `23`,
2475	`100`, `24`, `100`, `25`, `100`, `26`, `100`, `27`, `100`, `28`, `100`, `29`, `100`, `30`, `100`, `31`,
2476	);
2477	assert_eq_m256i(r, e);
2478	}
2479
2480	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2481	unsafe fn test_mm256_maskz_expand_epi8() {
2482	#[rustfmt::skip]
2483	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2484	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
2485	let r = _mm256_maskz_expand_epi8(`0b01010101_01010101_01010101_01010101`, a);
2486	#[rustfmt::skip]
2487	let e = _mm256_set_epi8(
2488	`0`, `16`, `0`, `17`, `0`, `18`, `0`, `19`, `0`, `20`, `0`, `21`, `0`, `22`, `0`, `23`,
2489	`0`, `24`, `0`, `25`, `0`, `26`, `0`, `27`, `0`, `28`, `0`, `29`, `0`, `30`, `0`, `31`,
2490	);
2491	assert_eq_m256i(r, e);
2492	}
2493
2494	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2495	unsafe fn test_mm_mask_expand_epi8() {
2496	let src = _mm_set1_epi8(`100`);
2497	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2498	let r = _mm_mask_expand_epi8(src, `0b01010101_01010101`, a);
2499	let e = _mm_set_epi8(
2500	`100`, `8`, `100`, `9`, `100`, `10`, `100`, `11`, `100`, `12`, `100`, `13`, `100`, `14`, `100`, `15`,
2501	);
2502	assert_eq_m128i(r, e);
2503	}
2504
2505	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2506	unsafe fn test_mm_maskz_expand_epi8() {
2507	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2508	let r = _mm_maskz_expand_epi8(`0b01010101_01010101`, a);
2509	let e = _mm_set_epi8(`0`, `8`, `0`, `9`, `0`, `10`, `0`, `11`, `0`, `12`, `0`, `13`, `0`, `14`, `0`, `15`);
2510	assert_eq_m128i(r, e);
2511	}
2512
2513	#[simd_test(enable = "avx512vbmi2")]
2514	unsafe fn test_mm512_shldv_epi64() {
2515	let a = _mm512_set1_epi64(`1`);
2516	let b = _mm512_set1_epi64(`1` << `63`);
2517	let c = _mm512_set1_epi64(`2`);
2518	let r = _mm512_shldv_epi64(a, b, c);
2519	let e = _mm512_set1_epi64(`6`);
2520	assert_eq_m512i(r, e);
2521	}
2522
2523	#[simd_test(enable = "avx512vbmi2")]
2524	unsafe fn test_mm512_mask_shldv_epi64() {
2525	let a = _mm512_set1_epi64(`1`);
2526	let b = _mm512_set1_epi64(`1` << `63`);
2527	let c = _mm512_set1_epi64(`2`);
2528	let r = _mm512_mask_shldv_epi64(a, `0`, b, c);
2529	assert_eq_m512i(r, a);
2530	let r = _mm512_mask_shldv_epi64(a, `0b11111111`, b, c);
2531	let e = _mm512_set1_epi64(`6`);
2532	assert_eq_m512i(r, e);
2533	}
2534
2535	#[simd_test(enable = "avx512vbmi2")]
2536	unsafe fn test_mm512_maskz_shldv_epi64() {
2537	let a = _mm512_set1_epi64(`1`);
2538	let b = _mm512_set1_epi64(`1` << `63`);
2539	let c = _mm512_set1_epi64(`2`);
2540	let r = _mm512_maskz_shldv_epi64(`0`, a, b, c);
2541	assert_eq_m512i(r, _mm512_setzero_si512());
2542	let r = _mm512_maskz_shldv_epi64(`0b11111111`, a, b, c);
2543	let e = _mm512_set1_epi64(`6`);
2544	assert_eq_m512i(r, e);
2545	}
2546
2547	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2548	unsafe fn test_mm256_shldv_epi64() {
2549	let a = _mm256_set1_epi64x(`1`);
2550	let b = _mm256_set1_epi64x(`1` << `63`);
2551	let c = _mm256_set1_epi64x(`2`);
2552	let r = _mm256_shldv_epi64(a, b, c);
2553	let e = _mm256_set1_epi64x(`6`);
2554	assert_eq_m256i(r, e);
2555	}
2556
2557	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2558	unsafe fn test_mm256_mask_shldv_epi64() {
2559	let a = _mm256_set1_epi64x(`1`);
2560	let b = _mm256_set1_epi64x(`1` << `63`);
2561	let c = _mm256_set1_epi64x(`2`);
2562	let r = _mm256_mask_shldv_epi64(a, `0`, b, c);
2563	assert_eq_m256i(r, a);
2564	let r = _mm256_mask_shldv_epi64(a, `0b00001111`, b, c);
2565	let e = _mm256_set1_epi64x(`6`);
2566	assert_eq_m256i(r, e);
2567	}
2568
2569	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2570	unsafe fn test_mm256_maskz_shldv_epi64() {
2571	let a = _mm256_set1_epi64x(`1`);
2572	let b = _mm256_set1_epi64x(`1` << `63`);
2573	let c = _mm256_set1_epi64x(`2`);
2574	let r = _mm256_maskz_shldv_epi64(`0`, a, b, c);
2575	assert_eq_m256i(r, _mm256_setzero_si256());
2576	let r = _mm256_maskz_shldv_epi64(`0b00001111`, a, b, c);
2577	let e = _mm256_set1_epi64x(`6`);
2578	assert_eq_m256i(r, e);
2579	}
2580
2581	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2582	unsafe fn test_mm_shldv_epi64() {
2583	let a = _mm_set1_epi64x(`1`);
2584	let b = _mm_set1_epi64x(`1` << `63`);
2585	let c = _mm_set1_epi64x(`2`);
2586	let r = _mm_shldv_epi64(a, b, c);
2587	let e = _mm_set1_epi64x(`6`);
2588	assert_eq_m128i(r, e);
2589	}
2590
2591	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2592	unsafe fn test_mm_mask_shldv_epi64() {
2593	let a = _mm_set1_epi64x(`1`);
2594	let b = _mm_set1_epi64x(`1` << `63`);
2595	let c = _mm_set1_epi64x(`2`);
2596	let r = _mm_mask_shldv_epi64(a, `0`, b, c);
2597	assert_eq_m128i(r, a);
2598	let r = _mm_mask_shldv_epi64(a, `0b00000011`, b, c);
2599	let e = _mm_set1_epi64x(`6`);
2600	assert_eq_m128i(r, e);
2601	}
2602
2603	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2604	unsafe fn test_mm_maskz_shldv_epi64() {
2605	let a = _mm_set1_epi64x(`1`);
2606	let b = _mm_set1_epi64x(`1` << `63`);
2607	let c = _mm_set1_epi64x(`2`);
2608	let r = _mm_maskz_shldv_epi64(`0`, a, b, c);
2609	assert_eq_m128i(r, _mm_setzero_si128());
2610	let r = _mm_maskz_shldv_epi64(`0b00000011`, a, b, c);
2611	let e = _mm_set1_epi64x(`6`);
2612	assert_eq_m128i(r, e);
2613	}
2614
2615	#[simd_test(enable = "avx512vbmi2")]
2616	unsafe fn test_mm512_shldv_epi32() {
2617	let a = _mm512_set1_epi32(`1`);
2618	let b = _mm512_set1_epi32(`1` << `31`);
2619	let c = _mm512_set1_epi32(`2`);
2620	let r = _mm512_shldv_epi32(a, b, c);
2621	let e = _mm512_set1_epi32(`6`);
2622	assert_eq_m512i(r, e);
2623	}
2624
2625	#[simd_test(enable = "avx512vbmi2")]
2626	unsafe fn test_mm512_mask_shldv_epi32() {
2627	let a = _mm512_set1_epi32(`1`);
2628	let b = _mm512_set1_epi32(`1` << `31`);
2629	let c = _mm512_set1_epi32(`2`);
2630	let r = _mm512_mask_shldv_epi32(a, `0`, b, c);
2631	assert_eq_m512i(r, a);
2632	let r = _mm512_mask_shldv_epi32(a, `0b11111111_11111111`, b, c);
2633	let e = _mm512_set1_epi32(`6`);
2634	assert_eq_m512i(r, e);
2635	}
2636
2637	#[simd_test(enable = "avx512vbmi2")]
2638	unsafe fn test_mm512_maskz_shldv_epi32() {
2639	let a = _mm512_set1_epi32(`1`);
2640	let b = _mm512_set1_epi32(`1` << `31`);
2641	let c = _mm512_set1_epi32(`2`);
2642	let r = _mm512_maskz_shldv_epi32(`0`, a, b, c);
2643	assert_eq_m512i(r, _mm512_setzero_si512());
2644	let r = _mm512_maskz_shldv_epi32(`0b11111111_11111111`, a, b, c);
2645	let e = _mm512_set1_epi32(`6`);
2646	assert_eq_m512i(r, e);
2647	}
2648
2649	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2650	unsafe fn test_mm256_shldv_epi32() {
2651	let a = _mm256_set1_epi32(`1`);
2652	let b = _mm256_set1_epi32(`1` << `31`);
2653	let c = _mm256_set1_epi32(`2`);
2654	let r = _mm256_shldv_epi32(a, b, c);
2655	let e = _mm256_set1_epi32(`6`);
2656	assert_eq_m256i(r, e);
2657	}
2658
2659	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2660	unsafe fn test_mm256_mask_shldv_epi32() {
2661	let a = _mm256_set1_epi32(`1`);
2662	let b = _mm256_set1_epi32(`1` << `31`);
2663	let c = _mm256_set1_epi32(`2`);
2664	let r = _mm256_mask_shldv_epi32(a, `0`, b, c);
2665	assert_eq_m256i(r, a);
2666	let r = _mm256_mask_shldv_epi32(a, `0b11111111`, b, c);
2667	let e = _mm256_set1_epi32(`6`);
2668	assert_eq_m256i(r, e);
2669	}
2670
2671	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2672	unsafe fn test_mm256_maskz_shldv_epi32() {
2673	let a = _mm256_set1_epi32(`1`);
2674	let b = _mm256_set1_epi32(`1` << `31`);
2675	let c = _mm256_set1_epi32(`2`);
2676	let r = _mm256_maskz_shldv_epi32(`0`, a, b, c);
2677	assert_eq_m256i(r, _mm256_setzero_si256());
2678	let r = _mm256_maskz_shldv_epi32(`0b11111111`, a, b, c);
2679	let e = _mm256_set1_epi32(`6`);
2680	assert_eq_m256i(r, e);
2681	}
2682
2683	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2684	unsafe fn test_mm_shldv_epi32() {
2685	let a = _mm_set1_epi32(`1`);
2686	let b = _mm_set1_epi32(`1` << `31`);
2687	let c = _mm_set1_epi32(`2`);
2688	let r = _mm_shldv_epi32(a, b, c);
2689	let e = _mm_set1_epi32(`6`);
2690	assert_eq_m128i(r, e);
2691	}
2692
2693	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2694	unsafe fn test_mm_mask_shldv_epi32() {
2695	let a = _mm_set1_epi32(`1`);
2696	let b = _mm_set1_epi32(`1` << `31`);
2697	let c = _mm_set1_epi32(`2`);
2698	let r = _mm_mask_shldv_epi32(a, `0`, b, c);
2699	assert_eq_m128i(r, a);
2700	let r = _mm_mask_shldv_epi32(a, `0b00001111`, b, c);
2701	let e = _mm_set1_epi32(`6`);
2702	assert_eq_m128i(r, e);
2703	}
2704
2705	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2706	unsafe fn test_mm_maskz_shldv_epi32() {
2707	let a = _mm_set1_epi32(`1`);
2708	let b = _mm_set1_epi32(`1` << `31`);
2709	let c = _mm_set1_epi32(`2`);
2710	let r = _mm_maskz_shldv_epi32(`0`, a, b, c);
2711	assert_eq_m128i(r, _mm_setzero_si128());
2712	let r = _mm_maskz_shldv_epi32(`0b00001111`, a, b, c);
2713	let e = _mm_set1_epi32(`6`);
2714	assert_eq_m128i(r, e);
2715	}
2716
2717	#[simd_test(enable = "avx512vbmi2")]
2718	unsafe fn test_mm512_shldv_epi16() {
2719	let a = _mm512_set1_epi16(`1`);
2720	let b = _mm512_set1_epi16(`1` << `15`);
2721	let c = _mm512_set1_epi16(`2`);
2722	let r = _mm512_shldv_epi16(a, b, c);
2723	let e = _mm512_set1_epi16(`6`);
2724	assert_eq_m512i(r, e);
2725	}
2726
2727	#[simd_test(enable = "avx512vbmi2")]
2728	unsafe fn test_mm512_mask_shldv_epi16() {
2729	let a = _mm512_set1_epi16(`1`);
2730	let b = _mm512_set1_epi16(`1` << `15`);
2731	let c = _mm512_set1_epi16(`2`);
2732	let r = _mm512_mask_shldv_epi16(a, `0`, b, c);
2733	assert_eq_m512i(r, a);
2734	let r = _mm512_mask_shldv_epi16(a, `0b11111111_11111111_11111111_11111111`, b, c);
2735	let e = _mm512_set1_epi16(`6`);
2736	assert_eq_m512i(r, e);
2737	}
2738
2739	#[simd_test(enable = "avx512vbmi2")]
2740	unsafe fn test_mm512_maskz_shldv_epi16() {
2741	let a = _mm512_set1_epi16(`1`);
2742	let b = _mm512_set1_epi16(`1` << `15`);
2743	let c = _mm512_set1_epi16(`2`);
2744	let r = _mm512_maskz_shldv_epi16(`0`, a, b, c);
2745	assert_eq_m512i(r, _mm512_setzero_si512());
2746	let r = _mm512_maskz_shldv_epi16(`0b11111111_11111111_11111111_11111111`, a, b, c);
2747	let e = _mm512_set1_epi16(`6`);
2748	assert_eq_m512i(r, e);
2749	}
2750
2751	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2752	unsafe fn test_mm256_shldv_epi16() {
2753	let a = _mm256_set1_epi16(`1`);
2754	let b = _mm256_set1_epi16(`1` << `15`);
2755	let c = _mm256_set1_epi16(`2`);
2756	let r = _mm256_shldv_epi16(a, b, c);
2757	let e = _mm256_set1_epi16(`6`);
2758	assert_eq_m256i(r, e);
2759	}
2760
2761	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2762	unsafe fn test_mm256_mask_shldv_epi16() {
2763	let a = _mm256_set1_epi16(`1`);
2764	let b = _mm256_set1_epi16(`1` << `15`);
2765	let c = _mm256_set1_epi16(`2`);
2766	let r = _mm256_mask_shldv_epi16(a, `0`, b, c);
2767	assert_eq_m256i(r, a);
2768	let r = _mm256_mask_shldv_epi16(a, `0b11111111_11111111`, b, c);
2769	let e = _mm256_set1_epi16(`6`);
2770	assert_eq_m256i(r, e);
2771	}
2772
2773	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2774	unsafe fn test_mm256_maskz_shldv_epi16() {
2775	let a = _mm256_set1_epi16(`1`);
2776	let b = _mm256_set1_epi16(`1` << `15`);
2777	let c = _mm256_set1_epi16(`2`);
2778	let r = _mm256_maskz_shldv_epi16(`0`, a, b, c);
2779	assert_eq_m256i(r, _mm256_setzero_si256());
2780	let r = _mm256_maskz_shldv_epi16(`0b11111111_11111111`, a, b, c);
2781	let e = _mm256_set1_epi16(`6`);
2782	assert_eq_m256i(r, e);
2783	}
2784
2785	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2786	unsafe fn test_mm_shldv_epi16() {
2787	let a = _mm_set1_epi16(`1`);
2788	let b = _mm_set1_epi16(`1` << `15`);
2789	let c = _mm_set1_epi16(`2`);
2790	let r = _mm_shldv_epi16(a, b, c);
2791	let e = _mm_set1_epi16(`6`);
2792	assert_eq_m128i(r, e);
2793	}
2794
2795	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2796	unsafe fn test_mm_mask_shldv_epi16() {
2797	let a = _mm_set1_epi16(`1`);
2798	let b = _mm_set1_epi16(`1` << `15`);
2799	let c = _mm_set1_epi16(`2`);
2800	let r = _mm_mask_shldv_epi16(a, `0`, b, c);
2801	assert_eq_m128i(r, a);
2802	let r = _mm_mask_shldv_epi16(a, `0b11111111`, b, c);
2803	let e = _mm_set1_epi16(`6`);
2804	assert_eq_m128i(r, e);
2805	}
2806
2807	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2808	unsafe fn test_mm_maskz_shldv_epi16() {
2809	let a = _mm_set1_epi16(`1`);
2810	let b = _mm_set1_epi16(`1` << `15`);
2811	let c = _mm_set1_epi16(`2`);
2812	let r = _mm_maskz_shldv_epi16(`0`, a, b, c);
2813	assert_eq_m128i(r, _mm_setzero_si128());
2814	let r = _mm_maskz_shldv_epi16(`0b11111111`, a, b, c);
2815	let e = _mm_set1_epi16(`6`);
2816	assert_eq_m128i(r, e);
2817	}
2818
2819	#[simd_test(enable = "avx512vbmi2")]
2820	unsafe fn test_mm512_shrdv_epi64() {
2821	let a = _mm512_set1_epi64(`2`);
2822	let b = _mm512_set1_epi64(`8`);
2823	let c = _mm512_set1_epi64(`1`);
2824	let r = _mm512_shrdv_epi64(a, b, c);
2825	let e = _mm512_set1_epi64(`1`);
2826	assert_eq_m512i(r, e);
2827	}
2828
2829	#[simd_test(enable = "avx512vbmi2")]
2830	unsafe fn test_mm512_mask_shrdv_epi64() {
2831	let a = _mm512_set1_epi64(`2`);
2832	let b = _mm512_set1_epi64(`8`);
2833	let c = _mm512_set1_epi64(`1`);
2834	let r = _mm512_mask_shrdv_epi64(a, `0`, b, c);
2835	assert_eq_m512i(r, a);
2836	let r = _mm512_mask_shrdv_epi64(a, `0b11111111`, b, c);
2837	let e = _mm512_set1_epi64(`1`);
2838	assert_eq_m512i(r, e);
2839	}
2840
2841	#[simd_test(enable = "avx512vbmi2")]
2842	unsafe fn test_mm512_maskz_shrdv_epi64() {
2843	let a = _mm512_set1_epi64(`2`);
2844	let b = _mm512_set1_epi64(`8`);
2845	let c = _mm512_set1_epi64(`1`);
2846	let r = _mm512_maskz_shrdv_epi64(`0`, a, b, c);
2847	assert_eq_m512i(r, _mm512_setzero_si512());
2848	let r = _mm512_maskz_shrdv_epi64(`0b11111111`, a, b, c);
2849	let e = _mm512_set1_epi64(`1`);
2850	assert_eq_m512i(r, e);
2851	}
2852
2853	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2854	unsafe fn test_mm256_shrdv_epi64() {
2855	let a = _mm256_set1_epi64x(`2`);
2856	let b = _mm256_set1_epi64x(`8`);
2857	let c = _mm256_set1_epi64x(`1`);
2858	let r = _mm256_shrdv_epi64(a, b, c);
2859	let e = _mm256_set1_epi64x(`1`);
2860	assert_eq_m256i(r, e);
2861	}
2862
2863	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2864	unsafe fn test_mm256_mask_shrdv_epi64() {
2865	let a = _mm256_set1_epi64x(`2`);
2866	let b = _mm256_set1_epi64x(`8`);
2867	let c = _mm256_set1_epi64x(`1`);
2868	let r = _mm256_mask_shrdv_epi64(a, `0`, b, c);
2869	assert_eq_m256i(r, a);
2870	let r = _mm256_mask_shrdv_epi64(a, `0b00001111`, b, c);
2871	let e = _mm256_set1_epi64x(`1`);
2872	assert_eq_m256i(r, e);
2873	}
2874
2875	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2876	unsafe fn test_mm256_maskz_shrdv_epi64() {
2877	let a = _mm256_set1_epi64x(`2`);
2878	let b = _mm256_set1_epi64x(`8`);
2879	let c = _mm256_set1_epi64x(`1`);
2880	let r = _mm256_maskz_shrdv_epi64(`0`, a, b, c);
2881	assert_eq_m256i(r, _mm256_setzero_si256());
2882	let r = _mm256_maskz_shrdv_epi64(`0b00001111`, a, b, c);
2883	let e = _mm256_set1_epi64x(`1`);
2884	assert_eq_m256i(r, e);
2885	}
2886
2887	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2888	unsafe fn test_mm_shrdv_epi64() {
2889	let a = _mm_set1_epi64x(`2`);
2890	let b = _mm_set1_epi64x(`8`);
2891	let c = _mm_set1_epi64x(`1`);
2892	let r = _mm_shrdv_epi64(a, b, c);
2893	let e = _mm_set1_epi64x(`1`);
2894	assert_eq_m128i(r, e);
2895	}
2896
2897	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2898	unsafe fn test_mm_mask_shrdv_epi64() {
2899	let a = _mm_set1_epi64x(`2`);
2900	let b = _mm_set1_epi64x(`8`);
2901	let c = _mm_set1_epi64x(`1`);
2902	let r = _mm_mask_shrdv_epi64(a, `0`, b, c);
2903	assert_eq_m128i(r, a);
2904	let r = _mm_mask_shrdv_epi64(a, `0b00000011`, b, c);
2905	let e = _mm_set1_epi64x(`1`);
2906	assert_eq_m128i(r, e);
2907	}
2908
2909	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2910	unsafe fn test_mm_maskz_shrdv_epi64() {
2911	let a = _mm_set1_epi64x(`2`);
2912	let b = _mm_set1_epi64x(`8`);
2913	let c = _mm_set1_epi64x(`1`);
2914	let r = _mm_maskz_shrdv_epi64(`0`, a, b, c);
2915	assert_eq_m128i(r, _mm_setzero_si128());
2916	let r = _mm_maskz_shrdv_epi64(`0b00000011`, a, b, c);
2917	let e = _mm_set1_epi64x(`1`);
2918	assert_eq_m128i(r, e);
2919	}
2920
2921	#[simd_test(enable = "avx512vbmi2")]
2922	unsafe fn test_mm512_shrdv_epi32() {
2923	let a = _mm512_set1_epi32(`2`);
2924	let b = _mm512_set1_epi32(`8`);
2925	let c = _mm512_set1_epi32(`1`);
2926	let r = _mm512_shrdv_epi32(a, b, c);
2927	let e = _mm512_set1_epi32(`1`);
2928	assert_eq_m512i(r, e);
2929	}
2930
2931	#[simd_test(enable = "avx512vbmi2")]
2932	unsafe fn test_mm512_mask_shrdv_epi32() {
2933	let a = _mm512_set1_epi32(`2`);
2934	let b = _mm512_set1_epi32(`8`);
2935	let c = _mm512_set1_epi32(`1`);
2936	let r = _mm512_mask_shrdv_epi32(a, `0`, b, c);
2937	assert_eq_m512i(r, a);
2938	let r = _mm512_mask_shrdv_epi32(a, `0b11111111_11111111`, b, c);
2939	let e = _mm512_set1_epi32(`1`);
2940	assert_eq_m512i(r, e);
2941	}
2942
2943	#[simd_test(enable = "avx512vbmi2")]
2944	unsafe fn test_mm512_maskz_shrdv_epi32() {
2945	let a = _mm512_set1_epi32(`2`);
2946	let b = _mm512_set1_epi32(`8`);
2947	let c = _mm512_set1_epi32(`1`);
2948	let r = _mm512_maskz_shrdv_epi32(`0`, a, b, c);
2949	assert_eq_m512i(r, _mm512_setzero_si512());
2950	let r = _mm512_maskz_shrdv_epi32(`0b11111111_11111111`, a, b, c);
2951	let e = _mm512_set1_epi32(`1`);
2952	assert_eq_m512i(r, e);
2953	}
2954
2955	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2956	unsafe fn test_mm256_shrdv_epi32() {
2957	let a = _mm256_set1_epi32(`2`);
2958	let b = _mm256_set1_epi32(`8`);
2959	let c = _mm256_set1_epi32(`1`);
2960	let r = _mm256_shrdv_epi32(a, b, c);
2961	let e = _mm256_set1_epi32(`1`);
2962	assert_eq_m256i(r, e);
2963	}
2964
2965	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2966	unsafe fn test_mm256_mask_shrdv_epi32() {
2967	let a = _mm256_set1_epi32(`2`);
2968	let b = _mm256_set1_epi32(`8`);
2969	let c = _mm256_set1_epi32(`1`);
2970	let r = _mm256_mask_shrdv_epi32(a, `0`, b, c);
2971	assert_eq_m256i(r, a);
2972	let r = _mm256_mask_shrdv_epi32(a, `0b11111111`, b, c);
2973	let e = _mm256_set1_epi32(`1`);
2974	assert_eq_m256i(r, e);
2975	}
2976
2977	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2978	unsafe fn test_mm256_maskz_shrdv_epi32() {
2979	let a = _mm256_set1_epi32(`2`);
2980	let b = _mm256_set1_epi32(`8`);
2981	let c = _mm256_set1_epi32(`1`);
2982	let r = _mm256_maskz_shrdv_epi32(`0`, a, b, c);
2983	assert_eq_m256i(r, _mm256_setzero_si256());
2984	let r = _mm256_maskz_shrdv_epi32(`0b11111111`, a, b, c);
2985	let e = _mm256_set1_epi32(`1`);
2986	assert_eq_m256i(r, e);
2987	}
2988
2989	#[simd_test(enable = "avx512vbmi2,avx512vl")]
2990	unsafe fn test_mm_shrdv_epi32() {
2991	let a = _mm_set1_epi32(`2`);
2992	let b = _mm_set1_epi32(`8`);
2993	let c = _mm_set1_epi32(`1`);
2994	let r = _mm_shrdv_epi32(a, b, c);
2995	let e = _mm_set1_epi32(`1`);
2996	assert_eq_m128i(r, e);
2997	}
2998
2999	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3000	unsafe fn test_mm_mask_shrdv_epi32() {
3001	let a = _mm_set1_epi32(`2`);
3002	let b = _mm_set1_epi32(`8`);
3003	let c = _mm_set1_epi32(`1`);
3004	let r = _mm_mask_shrdv_epi32(a, `0`, b, c);
3005	assert_eq_m128i(r, a);
3006	let r = _mm_mask_shrdv_epi32(a, `0b00001111`, b, c);
3007	let e = _mm_set1_epi32(`1`);
3008	assert_eq_m128i(r, e);
3009	}
3010
3011	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3012	unsafe fn test_mm_maskz_shrdv_epi32() {
3013	let a = _mm_set1_epi32(`2`);
3014	let b = _mm_set1_epi32(`8`);
3015	let c = _mm_set1_epi32(`1`);
3016	let r = _mm_maskz_shrdv_epi32(`0`, a, b, c);
3017	assert_eq_m128i(r, _mm_setzero_si128());
3018	let r = _mm_maskz_shrdv_epi32(`0b00001111`, a, b, c);
3019	let e = _mm_set1_epi32(`1`);
3020	assert_eq_m128i(r, e);
3021	}
3022
3023	#[simd_test(enable = "avx512vbmi2")]
3024	unsafe fn test_mm512_shrdv_epi16() {
3025	let a = _mm512_set1_epi16(`2`);
3026	let b = _mm512_set1_epi16(`8`);
3027	let c = _mm512_set1_epi16(`1`);
3028	let r = _mm512_shrdv_epi16(a, b, c);
3029	let e = _mm512_set1_epi16(`1`);
3030	assert_eq_m512i(r, e);
3031	}
3032
3033	#[simd_test(enable = "avx512vbmi2")]
3034	unsafe fn test_mm512_mask_shrdv_epi16() {
3035	let a = _mm512_set1_epi16(`2`);
3036	let b = _mm512_set1_epi16(`8`);
3037	let c = _mm512_set1_epi16(`1`);
3038	let r = _mm512_mask_shrdv_epi16(a, `0`, b, c);
3039	assert_eq_m512i(r, a);
3040	let r = _mm512_mask_shrdv_epi16(a, `0b11111111_11111111_11111111_11111111`, b, c);
3041	let e = _mm512_set1_epi16(`1`);
3042	assert_eq_m512i(r, e);
3043	}
3044
3045	#[simd_test(enable = "avx512vbmi2")]
3046	unsafe fn test_mm512_maskz_shrdv_epi16() {
3047	let a = _mm512_set1_epi16(`2`);
3048	let b = _mm512_set1_epi16(`8`);
3049	let c = _mm512_set1_epi16(`1`);
3050	let r = _mm512_maskz_shrdv_epi16(`0`, a, b, c);
3051	assert_eq_m512i(r, _mm512_setzero_si512());
3052	let r = _mm512_maskz_shrdv_epi16(`0b11111111_11111111_11111111_11111111`, a, b, c);
3053	let e = _mm512_set1_epi16(`1`);
3054	assert_eq_m512i(r, e);
3055	}
3056
3057	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3058	unsafe fn test_mm256_shrdv_epi16() {
3059	let a = _mm256_set1_epi16(`2`);
3060	let b = _mm256_set1_epi16(`8`);
3061	let c = _mm256_set1_epi16(`1`);
3062	let r = _mm256_shrdv_epi16(a, b, c);
3063	let e = _mm256_set1_epi16(`1`);
3064	assert_eq_m256i(r, e);
3065	}
3066
3067	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3068	unsafe fn test_mm256_mask_shrdv_epi16() {
3069	let a = _mm256_set1_epi16(`2`);
3070	let b = _mm256_set1_epi16(`8`);
3071	let c = _mm256_set1_epi16(`1`);
3072	let r = _mm256_mask_shrdv_epi16(a, `0`, b, c);
3073	assert_eq_m256i(r, a);
3074	let r = _mm256_mask_shrdv_epi16(a, `0b11111111_11111111`, b, c);
3075	let e = _mm256_set1_epi16(`1`);
3076	assert_eq_m256i(r, e);
3077	}
3078
3079	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3080	unsafe fn test_mm256_maskz_shrdv_epi16() {
3081	let a = _mm256_set1_epi16(`2`);
3082	let b = _mm256_set1_epi16(`8`);
3083	let c = _mm256_set1_epi16(`1`);
3084	let r = _mm256_maskz_shrdv_epi16(`0`, a, b, c);
3085	assert_eq_m256i(r, _mm256_setzero_si256());
3086	let r = _mm256_maskz_shrdv_epi16(`0b11111111_11111111`, a, b, c);
3087	let e = _mm256_set1_epi16(`1`);
3088	assert_eq_m256i(r, e);
3089	}
3090
3091	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3092	unsafe fn test_mm_shrdv_epi16() {
3093	let a = _mm_set1_epi16(`2`);
3094	let b = _mm_set1_epi16(`8`);
3095	let c = _mm_set1_epi16(`1`);
3096	let r = _mm_shrdv_epi16(a, b, c);
3097	let e = _mm_set1_epi16(`1`);
3098	assert_eq_m128i(r, e);
3099	}
3100
3101	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3102	unsafe fn test_mm_mask_shrdv_epi16() {
3103	let a = _mm_set1_epi16(`2`);
3104	let b = _mm_set1_epi16(`8`);
3105	let c = _mm_set1_epi16(`1`);
3106	let r = _mm_mask_shrdv_epi16(a, `0`, b, c);
3107	assert_eq_m128i(r, a);
3108	let r = _mm_mask_shrdv_epi16(a, `0b11111111`, b, c);
3109	let e = _mm_set1_epi16(`1`);
3110	assert_eq_m128i(r, e);
3111	}
3112
3113	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3114	unsafe fn test_mm_maskz_shrdv_epi16() {
3115	let a = _mm_set1_epi16(`2`);
3116	let b = _mm_set1_epi16(`8`);
3117	let c = _mm_set1_epi16(`1`);
3118	let r = _mm_maskz_shrdv_epi16(`0`, a, b, c);
3119	assert_eq_m128i(r, _mm_setzero_si128());
3120	let r = _mm_maskz_shrdv_epi16(`0b11111111`, a, b, c);
3121	let e = _mm_set1_epi16(`1`);
3122	assert_eq_m128i(r, e);
3123	}
3124
3125	#[simd_test(enable = "avx512vbmi2")]
3126	unsafe fn test_mm512_shldi_epi64() {
3127	let a = _mm512_set1_epi64(`1`);
3128	let b = _mm512_set1_epi64(`1` << `63`);
3129	let r = _mm512_shldi_epi64::<`2`>(a, b);
3130	let e = _mm512_set1_epi64(`6`);
3131	assert_eq_m512i(r, e);
3132	}
3133
3134	#[simd_test(enable = "avx512vbmi2")]
3135	unsafe fn test_mm512_mask_shldi_epi64() {
3136	let a = _mm512_set1_epi64(`1`);
3137	let b = _mm512_set1_epi64(`1` << `63`);
3138	let r = _mm512_mask_shldi_epi64::<`2`>(a, `0`, a, b);
3139	assert_eq_m512i(r, a);
3140	let r = _mm512_mask_shldi_epi64::<`2`>(a, `0b11111111`, a, b);
3141	let e = _mm512_set1_epi64(`6`);
3142	assert_eq_m512i(r, e);
3143	}
3144
3145	#[simd_test(enable = "avx512vbmi2")]
3146	unsafe fn test_mm512_maskz_shldi_epi64() {
3147	let a = _mm512_set1_epi64(`1`);
3148	let b = _mm512_set1_epi64(`1` << `63`);
3149	let r = _mm512_maskz_shldi_epi64::<`2`>(`0`, a, b);
3150	assert_eq_m512i(r, _mm512_setzero_si512());
3151	let r = _mm512_maskz_shldi_epi64::<`2`>(`0b11111111`, a, b);
3152	let e = _mm512_set1_epi64(`6`);
3153	assert_eq_m512i(r, e);
3154	}
3155
3156	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3157	unsafe fn test_mm256_shldi_epi64() {
3158	let a = _mm256_set1_epi64x(`1`);
3159	let b = _mm256_set1_epi64x(`1` << `63`);
3160	let r = _mm256_shldi_epi64::<`2`>(a, b);
3161	let e = _mm256_set1_epi64x(`6`);
3162	assert_eq_m256i(r, e);
3163	}
3164
3165	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3166	unsafe fn test_mm256_mask_shldi_epi64() {
3167	let a = _mm256_set1_epi64x(`1`);
3168	let b = _mm256_set1_epi64x(`1` << `63`);
3169	let r = _mm256_mask_shldi_epi64::<`2`>(a, `0`, a, b);
3170	assert_eq_m256i(r, a);
3171	let r = _mm256_mask_shldi_epi64::<`2`>(a, `0b00001111`, a, b);
3172	let e = _mm256_set1_epi64x(`6`);
3173	assert_eq_m256i(r, e);
3174	}
3175
3176	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3177	unsafe fn test_mm256_maskz_shldi_epi64() {
3178	let a = _mm256_set1_epi64x(`1`);
3179	let b = _mm256_set1_epi64x(`1` << `63`);
3180	let r = _mm256_maskz_shldi_epi64::<`2`>(`0`, a, b);
3181	assert_eq_m256i(r, _mm256_setzero_si256());
3182	let r = _mm256_maskz_shldi_epi64::<`2`>(`0b00001111`, a, b);
3183	let e = _mm256_set1_epi64x(`6`);
3184	assert_eq_m256i(r, e);
3185	}
3186
3187	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3188	unsafe fn test_mm_shldi_epi64() {
3189	let a = _mm_set1_epi64x(`1`);
3190	let b = _mm_set1_epi64x(`1` << `63`);
3191	let r = _mm_shldi_epi64::<`2`>(a, b);
3192	let e = _mm_set1_epi64x(`6`);
3193	assert_eq_m128i(r, e);
3194	}
3195
3196	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3197	unsafe fn test_mm_mask_shldi_epi64() {
3198	let a = _mm_set1_epi64x(`1`);
3199	let b = _mm_set1_epi64x(`1` << `63`);
3200	let r = _mm_mask_shldi_epi64::<`2`>(a, `0`, a, b);
3201	assert_eq_m128i(r, a);
3202	let r = _mm_mask_shldi_epi64::<`2`>(a, `0b00000011`, a, b);
3203	let e = _mm_set1_epi64x(`6`);
3204	assert_eq_m128i(r, e);
3205	}
3206
3207	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3208	unsafe fn test_mm_maskz_shldi_epi64() {
3209	let a = _mm_set1_epi64x(`1`);
3210	let b = _mm_set1_epi64x(`1` << `63`);
3211	let r = _mm_maskz_shldi_epi64::<`2`>(`0`, a, b);
3212	assert_eq_m128i(r, _mm_setzero_si128());
3213	let r = _mm_maskz_shldi_epi64::<`2`>(`0b00000011`, a, b);
3214	let e = _mm_set1_epi64x(`6`);
3215	assert_eq_m128i(r, e);
3216	}
3217
3218	#[simd_test(enable = "avx512vbmi2")]
3219	unsafe fn test_mm512_shldi_epi32() {
3220	let a = _mm512_set1_epi32(`1`);
3221	let b = _mm512_set1_epi32(`1` << `31`);
3222	let r = _mm512_shldi_epi32::<`2`>(a, b);
3223	let e = _mm512_set1_epi32(`6`);
3224	assert_eq_m512i(r, e);
3225	}
3226
3227	#[simd_test(enable = "avx512vbmi2")]
3228	unsafe fn test_mm512_mask_shldi_epi32() {
3229	let a = _mm512_set1_epi32(`1`);
3230	let b = _mm512_set1_epi32(`1` << `31`);
3231	let r = _mm512_mask_shldi_epi32::<`2`>(a, `0`, a, b);
3232	assert_eq_m512i(r, a);
3233	let r = _mm512_mask_shldi_epi32::<`2`>(a, `0b11111111_11111111`, a, b);
3234	let e = _mm512_set1_epi32(`6`);
3235	assert_eq_m512i(r, e);
3236	}
3237
3238	#[simd_test(enable = "avx512vbmi2")]
3239	unsafe fn test_mm512_maskz_shldi_epi32() {
3240	let a = _mm512_set1_epi32(`1`);
3241	let b = _mm512_set1_epi32(`1` << `31`);
3242	let r = _mm512_maskz_shldi_epi32::<`2`>(`0`, a, b);
3243	assert_eq_m512i(r, _mm512_setzero_si512());
3244	let r = _mm512_maskz_shldi_epi32::<`2`>(`0b11111111_11111111`, a, b);
3245	let e = _mm512_set1_epi32(`6`);
3246	assert_eq_m512i(r, e);
3247	}
3248
3249	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3250	unsafe fn test_mm256_shldi_epi32() {
3251	let a = _mm256_set1_epi32(`1`);
3252	let b = _mm256_set1_epi32(`1` << `31`);
3253	let r = _mm256_shldi_epi32::<`2`>(a, b);
3254	let e = _mm256_set1_epi32(`6`);
3255	assert_eq_m256i(r, e);
3256	}
3257
3258	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3259	unsafe fn test_mm256_mask_shldi_epi32() {
3260	let a = _mm256_set1_epi32(`1`);
3261	let b = _mm256_set1_epi32(`1` << `31`);
3262	let r = _mm256_mask_shldi_epi32::<`2`>(a, `0`, a, b);
3263	assert_eq_m256i(r, a);
3264	let r = _mm256_mask_shldi_epi32::<`2`>(a, `0b11111111`, a, b);
3265	let e = _mm256_set1_epi32(`6`);
3266	assert_eq_m256i(r, e);
3267	}
3268
3269	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3270	unsafe fn test_mm256_maskz_shldi_epi32() {
3271	let a = _mm256_set1_epi32(`1`);
3272	let b = _mm256_set1_epi32(`1` << `31`);
3273	let r = _mm256_maskz_shldi_epi32::<`2`>(`0`, a, b);
3274	assert_eq_m256i(r, _mm256_setzero_si256());
3275	let r = _mm256_maskz_shldi_epi32::<`2`>(`0b11111111`, a, b);
3276	let e = _mm256_set1_epi32(`6`);
3277	assert_eq_m256i(r, e);
3278	}
3279
3280	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3281	unsafe fn test_mm_shldi_epi32() {
3282	let a = _mm_set1_epi32(`1`);
3283	let b = _mm_set1_epi32(`1` << `31`);
3284	let r = _mm_shldi_epi32::<`2`>(a, b);
3285	let e = _mm_set1_epi32(`6`);
3286	assert_eq_m128i(r, e);
3287	}
3288
3289	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3290	unsafe fn test_mm_mask_shldi_epi32() {
3291	let a = _mm_set1_epi32(`1`);
3292	let b = _mm_set1_epi32(`1` << `31`);
3293	let r = _mm_mask_shldi_epi32::<`2`>(a, `0`, a, b);
3294	assert_eq_m128i(r, a);
3295	let r = _mm_mask_shldi_epi32::<`2`>(a, `0b00001111`, a, b);
3296	let e = _mm_set1_epi32(`6`);
3297	assert_eq_m128i(r, e);
3298	}
3299
3300	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3301	unsafe fn test_mm_maskz_shldi_epi32() {
3302	let a = _mm_set1_epi32(`1`);
3303	let b = _mm_set1_epi32(`1` << `31`);
3304	let r = _mm_maskz_shldi_epi32::<`2`>(`0`, a, b);
3305	assert_eq_m128i(r, _mm_setzero_si128());
3306	let r = _mm_maskz_shldi_epi32::<`2`>(`0b00001111`, a, b);
3307	let e = _mm_set1_epi32(`6`);
3308	assert_eq_m128i(r, e);
3309	}
3310
3311	#[simd_test(enable = "avx512vbmi2")]
3312	unsafe fn test_mm512_shldi_epi16() {
3313	let a = _mm512_set1_epi16(`1`);
3314	let b = _mm512_set1_epi16(`1` << `15`);
3315	let r = _mm512_shldi_epi16::<`2`>(a, b);
3316	let e = _mm512_set1_epi16(`6`);
3317	assert_eq_m512i(r, e);
3318	}
3319
3320	#[simd_test(enable = "avx512vbmi2")]
3321	unsafe fn test_mm512_mask_shldi_epi16() {
3322	let a = _mm512_set1_epi16(`1`);
3323	let b = _mm512_set1_epi16(`1` << `15`);
3324	let r = _mm512_mask_shldi_epi16::<`2`>(a, `0`, a, b);
3325	assert_eq_m512i(r, a);
3326	let r = _mm512_mask_shldi_epi16::<`2`>(a, `0b11111111_11111111_11111111_11111111`, a, b);
3327	let e = _mm512_set1_epi16(`6`);
3328	assert_eq_m512i(r, e);
3329	}
3330
3331	#[simd_test(enable = "avx512vbmi2")]
3332	unsafe fn test_mm512_maskz_shldi_epi16() {
3333	let a = _mm512_set1_epi16(`1`);
3334	let b = _mm512_set1_epi16(`1` << `15`);
3335	let r = _mm512_maskz_shldi_epi16::<`2`>(`0`, a, b);
3336	assert_eq_m512i(r, _mm512_setzero_si512());
3337	let r = _mm512_maskz_shldi_epi16::<`2`>(`0b11111111_11111111_11111111_11111111`, a, b);
3338	let e = _mm512_set1_epi16(`6`);
3339	assert_eq_m512i(r, e);
3340	}
3341
3342	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3343	unsafe fn test_mm256_shldi_epi16() {
3344	let a = _mm256_set1_epi16(`1`);
3345	let b = _mm256_set1_epi16(`1` << `15`);
3346	let r = _mm256_shldi_epi16::<`2`>(a, b);
3347	let e = _mm256_set1_epi16(`6`);
3348	assert_eq_m256i(r, e);
3349	}
3350
3351	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3352	unsafe fn test_mm256_mask_shldi_epi16() {
3353	let a = _mm256_set1_epi16(`1`);
3354	let b = _mm256_set1_epi16(`1` << `15`);
3355	let r = _mm256_mask_shldi_epi16::<`2`>(a, `0`, a, b);
3356	assert_eq_m256i(r, a);
3357	let r = _mm256_mask_shldi_epi16::<`2`>(a, `0b11111111_11111111`, a, b);
3358	let e = _mm256_set1_epi16(`6`);
3359	assert_eq_m256i(r, e);
3360	}
3361
3362	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3363	unsafe fn test_mm256_maskz_shldi_epi16() {
3364	let a = _mm256_set1_epi16(`1`);
3365	let b = _mm256_set1_epi16(`1` << `15`);
3366	let r = _mm256_maskz_shldi_epi16::<`2`>(`0`, a, b);
3367	assert_eq_m256i(r, _mm256_setzero_si256());
3368	let r = _mm256_maskz_shldi_epi16::<`2`>(`0b11111111_11111111`, a, b);
3369	let e = _mm256_set1_epi16(`6`);
3370	assert_eq_m256i(r, e);
3371	}
3372
3373	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3374	unsafe fn test_mm_shldi_epi16() {
3375	let a = _mm_set1_epi16(`1`);
3376	let b = _mm_set1_epi16(`1` << `15`);
3377	let r = _mm_shldi_epi16::<`2`>(a, b);
3378	let e = _mm_set1_epi16(`6`);
3379	assert_eq_m128i(r, e);
3380	}
3381
3382	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3383	unsafe fn test_mm_mask_shldi_epi16() {
3384	let a = _mm_set1_epi16(`1`);
3385	let b = _mm_set1_epi16(`1` << `15`);
3386	let r = _mm_mask_shldi_epi16::<`2`>(a, `0`, a, b);
3387	assert_eq_m128i(r, a);
3388	let r = _mm_mask_shldi_epi16::<`2`>(a, `0b11111111`, a, b);
3389	let e = _mm_set1_epi16(`6`);
3390	assert_eq_m128i(r, e);
3391	}
3392
3393	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3394	unsafe fn test_mm_maskz_shldi_epi16() {
3395	let a = _mm_set1_epi16(`1`);
3396	let b = _mm_set1_epi16(`1` << `15`);
3397	let r = _mm_maskz_shldi_epi16::<`2`>(`0`, a, b);
3398	assert_eq_m128i(r, _mm_setzero_si128());
3399	let r = _mm_maskz_shldi_epi16::<`2`>(`0b11111111`, a, b);
3400	let e = _mm_set1_epi16(`6`);
3401	assert_eq_m128i(r, e);
3402	}
3403
3404	#[simd_test(enable = "avx512vbmi2")]
3405	unsafe fn test_mm512_shrdi_epi64() {
3406	let a = _mm512_set1_epi64(`2`);
3407	let b = _mm512_set1_epi64(`8`);
3408	let r = _mm512_shrdi_epi64::<`1`>(a, b);
3409	let e = _mm512_set1_epi64(`1`);
3410	assert_eq_m512i(r, e);
3411	}
3412
3413	#[simd_test(enable = "avx512vbmi2")]
3414	unsafe fn test_mm512_mask_shrdi_epi64() {
3415	let a = _mm512_set1_epi64(`2`);
3416	let b = _mm512_set1_epi64(`8`);
3417	let r = _mm512_mask_shrdi_epi64::<`1`>(a, `0`, a, b);
3418	assert_eq_m512i(r, a);
3419	let r = _mm512_mask_shrdi_epi64::<`1`>(a, `0b11111111`, a, b);
3420	let e = _mm512_set1_epi64(`1`);
3421	assert_eq_m512i(r, e);
3422	}
3423
3424	#[simd_test(enable = "avx512vbmi2")]
3425	unsafe fn test_mm512_maskz_shrdi_epi64() {
3426	let a = _mm512_set1_epi64(`2`);
3427	let b = _mm512_set1_epi64(`8`);
3428	let r = _mm512_maskz_shrdi_epi64::<`1`>(`0`, a, b);
3429	assert_eq_m512i(r, _mm512_setzero_si512());
3430	let r = _mm512_maskz_shrdi_epi64::<`1`>(`0b11111111`, a, b);
3431	let e = _mm512_set1_epi64(`1`);
3432	assert_eq_m512i(r, e);
3433	}
3434
3435	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3436	unsafe fn test_mm256_shrdi_epi64() {
3437	let a = _mm256_set1_epi64x(`2`);
3438	let b = _mm256_set1_epi64x(`8`);
3439	let r = _mm256_shrdi_epi64::<`1`>(a, b);
3440	let e = _mm256_set1_epi64x(`1`);
3441	assert_eq_m256i(r, e);
3442	}
3443
3444	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3445	unsafe fn test_mm256_mask_shrdi_epi64() {
3446	let a = _mm256_set1_epi64x(`2`);
3447	let b = _mm256_set1_epi64x(`8`);
3448	let r = _mm256_mask_shrdi_epi64::<`1`>(a, `0`, a, b);
3449	assert_eq_m256i(r, a);
3450	let r = _mm256_mask_shrdi_epi64::<`1`>(a, `0b00001111`, a, b);
3451	let e = _mm256_set1_epi64x(`1`);
3452	assert_eq_m256i(r, e);
3453	}
3454
3455	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3456	unsafe fn test_mm256_maskz_shrdi_epi64() {
3457	let a = _mm256_set1_epi64x(`2`);
3458	let b = _mm256_set1_epi64x(`8`);
3459	let r = _mm256_maskz_shrdi_epi64::<`1`>(`0`, a, b);
3460	assert_eq_m256i(r, _mm256_setzero_si256());
3461	let r = _mm256_maskz_shrdi_epi64::<`1`>(`0b00001111`, a, b);
3462	let e = _mm256_set1_epi64x(`1`);
3463	assert_eq_m256i(r, e);
3464	}
3465
3466	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3467	unsafe fn test_mm_shrdi_epi64() {
3468	let a = _mm_set1_epi64x(`2`);
3469	let b = _mm_set1_epi64x(`8`);
3470	let r = _mm_shrdi_epi64::<`1`>(a, b);
3471	let e = _mm_set1_epi64x(`1`);
3472	assert_eq_m128i(r, e);
3473	}
3474
3475	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3476	unsafe fn test_mm_mask_shrdi_epi64() {
3477	let a = _mm_set1_epi64x(`2`);
3478	let b = _mm_set1_epi64x(`8`);
3479	let r = _mm_mask_shrdi_epi64::<`1`>(a, `0`, a, b);
3480	assert_eq_m128i(r, a);
3481	let r = _mm_mask_shrdi_epi64::<`1`>(a, `0b00000011`, a, b);
3482	let e = _mm_set1_epi64x(`1`);
3483	assert_eq_m128i(r, e);
3484	}
3485
3486	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3487	unsafe fn test_mm_maskz_shrdi_epi64() {
3488	let a = _mm_set1_epi64x(`2`);
3489	let b = _mm_set1_epi64x(`8`);
3490	let r = _mm_maskz_shrdi_epi64::<`1`>(`0`, a, b);
3491	assert_eq_m128i(r, _mm_setzero_si128());
3492	let r = _mm_maskz_shrdi_epi64::<`1`>(`0b00000011`, a, b);
3493	let e = _mm_set1_epi64x(`1`);
3494	assert_eq_m128i(r, e);
3495	}
3496
3497	#[simd_test(enable = "avx512vbmi2")]
3498	unsafe fn test_mm512_shrdi_epi32() {
3499	let a = _mm512_set1_epi32(`2`);
3500	let b = _mm512_set1_epi32(`8`);
3501	let r = _mm512_shrdi_epi32::<`1`>(a, b);
3502	let e = _mm512_set1_epi32(`1`);
3503	assert_eq_m512i(r, e);
3504	}
3505
3506	#[simd_test(enable = "avx512vbmi2")]
3507	unsafe fn test_mm512_mask_shrdi_epi32() {
3508	let a = _mm512_set1_epi32(`2`);
3509	let b = _mm512_set1_epi32(`8`);
3510	let r = _mm512_mask_shrdi_epi32::<`1`>(a, `0`, a, b);
3511	assert_eq_m512i(r, a);
3512	let r = _mm512_mask_shrdi_epi32::<`1`>(a, `0b11111111_11111111`, a, b);
3513	let e = _mm512_set1_epi32(`1`);
3514	assert_eq_m512i(r, e);
3515	}
3516
3517	#[simd_test(enable = "avx512vbmi2")]
3518	unsafe fn test_mm512_maskz_shrdi_epi32() {
3519	let a = _mm512_set1_epi32(`2`);
3520	let b = _mm512_set1_epi32(`8`);
3521	let r = _mm512_maskz_shrdi_epi32::<`1`>(`0`, a, b);
3522	assert_eq_m512i(r, _mm512_setzero_si512());
3523	let r = _mm512_maskz_shrdi_epi32::<`1`>(`0b11111111_11111111`, a, b);
3524	let e = _mm512_set1_epi32(`1`);
3525	assert_eq_m512i(r, e);
3526	}
3527
3528	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3529	unsafe fn test_mm256_shrdi_epi32() {
3530	let a = _mm256_set1_epi32(`2`);
3531	let b = _mm256_set1_epi32(`8`);
3532	let r = _mm256_shrdi_epi32::<`1`>(a, b);
3533	let e = _mm256_set1_epi32(`1`);
3534	assert_eq_m256i(r, e);
3535	}
3536
3537	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3538	unsafe fn test_mm256_mask_shrdi_epi32() {
3539	let a = _mm256_set1_epi32(`2`);
3540	let b = _mm256_set1_epi32(`8`);
3541	let r = _mm256_mask_shrdi_epi32::<`1`>(a, `0`, a, b);
3542	assert_eq_m256i(r, a);
3543	let r = _mm256_mask_shrdi_epi32::<`1`>(a, `0b11111111`, a, b);
3544	let e = _mm256_set1_epi32(`1`);
3545	assert_eq_m256i(r, e);
3546	}
3547
3548	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3549	unsafe fn test_mm256_maskz_shrdi_epi32() {
3550	let a = _mm256_set1_epi32(`2`);
3551	let b = _mm256_set1_epi32(`8`);
3552	let r = _mm256_maskz_shrdi_epi32::<`1`>(`0`, a, b);
3553	assert_eq_m256i(r, _mm256_setzero_si256());
3554	let r = _mm256_maskz_shrdi_epi32::<`1`>(`0b11111111`, a, b);
3555	let e = _mm256_set1_epi32(`1`);
3556	assert_eq_m256i(r, e);
3557	}
3558
3559	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3560	unsafe fn test_mm_shrdi_epi32() {
3561	let a = _mm_set1_epi32(`2`);
3562	let b = _mm_set1_epi32(`8`);
3563	let r = _mm_shrdi_epi32::<`1`>(a, b);
3564	let e = _mm_set1_epi32(`1`);
3565	assert_eq_m128i(r, e);
3566	}
3567
3568	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3569	unsafe fn test_mm_mask_shrdi_epi32() {
3570	let a = _mm_set1_epi32(`2`);
3571	let b = _mm_set1_epi32(`8`);
3572	let r = _mm_mask_shrdi_epi32::<`1`>(a, `0`, a, b);
3573	assert_eq_m128i(r, a);
3574	let r = _mm_mask_shrdi_epi32::<`1`>(a, `0b00001111`, a, b);
3575	let e = _mm_set1_epi32(`1`);
3576	assert_eq_m128i(r, e);
3577	}
3578
3579	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3580	unsafe fn test_mm_maskz_shrdi_epi32() {
3581	let a = _mm_set1_epi32(`2`);
3582	let b = _mm_set1_epi32(`8`);
3583	let r = _mm_maskz_shrdi_epi32::<`1`>(`0`, a, b);
3584	assert_eq_m128i(r, _mm_setzero_si128());
3585	let r = _mm_maskz_shrdi_epi32::<`1`>(`0b00001111`, a, b);
3586	let e = _mm_set1_epi32(`1`);
3587	assert_eq_m128i(r, e);
3588	}
3589
3590	#[simd_test(enable = "avx512vbmi2")]
3591	unsafe fn test_mm512_shrdi_epi16() {
3592	let a = _mm512_set1_epi16(`2`);
3593	let b = _mm512_set1_epi16(`8`);
3594	let r = _mm512_shrdi_epi16::<`1`>(a, b);
3595	let e = _mm512_set1_epi16(`1`);
3596	assert_eq_m512i(r, e);
3597	}
3598
3599	#[simd_test(enable = "avx512vbmi2")]
3600	unsafe fn test_mm512_mask_shrdi_epi16() {
3601	let a = _mm512_set1_epi16(`2`);
3602	let b = _mm512_set1_epi16(`8`);
3603	let r = _mm512_mask_shrdi_epi16::<`1`>(a, `0`, a, b);
3604	assert_eq_m512i(r, a);
3605	let r = _mm512_mask_shrdi_epi16::<`1`>(a, `0b11111111_11111111_11111111_11111111`, a, b);
3606	let e = _mm512_set1_epi16(`1`);
3607	assert_eq_m512i(r, e);
3608	}
3609
3610	#[simd_test(enable = "avx512vbmi2")]
3611	unsafe fn test_mm512_maskz_shrdi_epi16() {
3612	let a = _mm512_set1_epi16(`2`);
3613	let b = _mm512_set1_epi16(`8`);
3614	let r = _mm512_maskz_shrdi_epi16::<`1`>(`0`, a, b);
3615	assert_eq_m512i(r, _mm512_setzero_si512());
3616	let r = _mm512_maskz_shrdi_epi16::<`1`>(`0b11111111_11111111_11111111_11111111`, a, b);
3617	let e = _mm512_set1_epi16(`1`);
3618	assert_eq_m512i(r, e);
3619	}
3620
3621	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3622	unsafe fn test_mm256_shrdi_epi16() {
3623	let a = _mm256_set1_epi16(`2`);
3624	let b = _mm256_set1_epi16(`8`);
3625	let r = _mm256_shrdi_epi16::<`1`>(a, b);
3626	let e = _mm256_set1_epi16(`1`);
3627	assert_eq_m256i(r, e);
3628	}
3629
3630	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3631	unsafe fn test_mm256_mask_shrdi_epi16() {
3632	let a = _mm256_set1_epi16(`2`);
3633	let b = _mm256_set1_epi16(`8`);
3634	let r = _mm256_mask_shrdi_epi16::<`1`>(a, `0`, a, b);
3635	assert_eq_m256i(r, a);
3636	let r = _mm256_mask_shrdi_epi16::<`1`>(a, `0b11111111_11111111`, a, b);
3637	let e = _mm256_set1_epi16(`1`);
3638	assert_eq_m256i(r, e);
3639	}
3640
3641	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3642	unsafe fn test_mm256_maskz_shrdi_epi16() {
3643	let a = _mm256_set1_epi16(`2`);
3644	let b = _mm256_set1_epi16(`8`);
3645	let r = _mm256_maskz_shrdi_epi16::<`1`>(`0`, a, b);
3646	assert_eq_m256i(r, _mm256_setzero_si256());
3647	let r = _mm256_maskz_shrdi_epi16::<`1`>(`0b11111111_11111111`, a, b);
3648	let e = _mm256_set1_epi16(`1`);
3649	assert_eq_m256i(r, e);
3650	}
3651
3652	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3653	unsafe fn test_mm_shrdi_epi16() {
3654	let a = _mm_set1_epi16(`2`);
3655	let b = _mm_set1_epi16(`8`);
3656	let r = _mm_shrdi_epi16::<`1`>(a, b);
3657	let e = _mm_set1_epi16(`1`);
3658	assert_eq_m128i(r, e);
3659	}
3660
3661	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3662	unsafe fn test_mm_mask_shrdi_epi16() {
3663	let a = _mm_set1_epi16(`2`);
3664	let b = _mm_set1_epi16(`8`);
3665	let r = _mm_mask_shrdi_epi16::<`1`>(a, `0`, a, b);
3666	assert_eq_m128i(r, a);
3667	let r = _mm_mask_shrdi_epi16::<`1`>(a, `0b11111111`, a, b);
3668	let e = _mm_set1_epi16(`1`);
3669	assert_eq_m128i(r, e);
3670	}
3671
3672	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3673	unsafe fn test_mm_maskz_shrdi_epi16() {
3674	let a = _mm_set1_epi16(`2`);
3675	let b = _mm_set1_epi16(`8`);
3676	let r = _mm_maskz_shrdi_epi16::<`1`>(`0`, a, b);
3677	assert_eq_m128i(r, _mm_setzero_si128());
3678	let r = _mm_maskz_shrdi_epi16::<`1`>(`0b11111111`, a, b);
3679	let e = _mm_set1_epi16(`1`);
3680	assert_eq_m128i(r, e);
3681	}
3682
3683	#[simd_test(enable = "avx512vbmi2")]
3684	unsafe fn test_mm512_mask_expandloadu_epi16() {
3685	let src = _mm512_set1_epi16(`42`);
3686	let a = &[
3687	`1_i16`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
3688	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
3689	];
3690	let p = a.as_ptr();
3691	let m = `0b11101000_11001010_11110000_00001111`;
3692	let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p));
3693	let e = _mm512_set_epi16(
3694	`16`, `15`, `14`, `42`, `13`, `42`, `42`, `42`, `12`, `11`, `42`, `42`, `10`, `42`, `9`, `42`, `8`, `7`, `6`, `5`, `42`, `42`, `42`,
3695	`42`, `42`, `42`, `42`, `42`, `4`, `3`, `2`, `1`,
3696	);
3697	assert_eq_m512i(r, e);
3698	}
3699
3700	#[simd_test(enable = "avx512vbmi2")]
3701	unsafe fn test_mm512_maskz_expandloadu_epi16() {
3702	let a = &[
3703	`1_i16`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
3704	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
3705	];
3706	let p = a.as_ptr();
3707	let m = `0b11101000_11001010_11110000_00001111`;
3708	let r = _mm512_maskz_expandloadu_epi16(m, black_box(p));
3709	let e = _mm512_set_epi16(
3710	`16`, `15`, `14`, `0`, `13`, `0`, `0`, `0`, `12`, `11`, `0`, `0`, `10`, `0`, `9`, `0`, `8`, `7`, `6`, `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
3711	`0`, `4`, `3`, `2`, `1`,
3712	);
3713	assert_eq_m512i(r, e);
3714	}
3715
3716	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3717	unsafe fn test_mm256_mask_expandloadu_epi16() {
3718	let src = _mm256_set1_epi16(`42`);
3719	let a = &[`1_i16`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
3720	let p = a.as_ptr();
3721	let m = `0b11101000_11001010`;
3722	let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p));
3723	let e = _mm256_set_epi16(`8`, `7`, `6`, `42`, `5`, `42`, `42`, `42`, `4`, `3`, `42`, `42`, `2`, `42`, `1`, `42`);
3724	assert_eq_m256i(r, e);
3725	}
3726
3727	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3728	unsafe fn test_mm256_maskz_expandloadu_epi16() {
3729	let a = &[`1_i16`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
3730	let p = a.as_ptr();
3731	let m = `0b11101000_11001010`;
3732	let r = _mm256_maskz_expandloadu_epi16(m, black_box(p));
3733	let e = _mm256_set_epi16(`8`, `7`, `6`, `0`, `5`, `0`, `0`, `0`, `4`, `3`, `0`, `0`, `2`, `0`, `1`, `0`);
3734	assert_eq_m256i(r, e);
3735	}
3736
3737	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3738	unsafe fn test_mm_mask_expandloadu_epi16() {
3739	let src = _mm_set1_epi16(`42`);
3740	let a = &[`1_i16`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
3741	let p = a.as_ptr();
3742	let m = `0b11101000`;
3743	let r = _mm_mask_expandloadu_epi16(src, m, black_box(p));
3744	let e = _mm_set_epi16(`4`, `3`, `2`, `42`, `1`, `42`, `42`, `42`);
3745	assert_eq_m128i(r, e);
3746	}
3747
3748	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3749	unsafe fn test_mm_maskz_expandloadu_epi16() {
3750	let a = &[`1_i16`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
3751	let p = a.as_ptr();
3752	let m = `0b11101000`;
3753	let r = _mm_maskz_expandloadu_epi16(m, black_box(p));
3754	let e = _mm_set_epi16(`4`, `3`, `2`, `0`, `1`, `0`, `0`, `0`);
3755	assert_eq_m128i(r, e);
3756	}
3757
3758	#[simd_test(enable = "avx512vbmi2")]
3759	unsafe fn test_mm512_mask_expandloadu_epi8() {
3760	let src = _mm512_set1_epi8(`42`);
3761	let a = &[
3762	`1_i8`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
3763	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`,
3764	`46`, `47`, `48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`, `64`,
3765	];
3766	let p = a.as_ptr();
3767	let m = `0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101`;
3768	let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p));
3769	let e = _mm512_set_epi8(
3770	`32`, `31`, `30`, `42`, `29`, `42`, `42`, `42`, `28`, `27`, `42`, `42`, `26`, `42`, `25`, `42`, `24`, `23`, `22`, `21`, `42`, `42`,
3771	`42`, `42`, `42`, `42`, `42`, `42`, `20`, `19`, `18`, `17`, `16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `42`, `42`, `42`, `42`,
3772	`42`, `42`, `42`, `42`, `8`, `42`, `7`, `42`, `6`, `42`, `5`, `42`, `42`, `4`, `42`, `3`, `42`, `2`, `42`, `1`,
3773	);
3774	assert_eq_m512i(r, e);
3775	}
3776
3777	#[simd_test(enable = "avx512vbmi2")]
3778	unsafe fn test_mm512_maskz_expandloadu_epi8() {
3779	let a = &[
3780	`1_i8`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
3781	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`,
3782	`46`, `47`, `48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`, `64`,
3783	];
3784	let p = a.as_ptr();
3785	let m = `0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101`;
3786	let r = _mm512_maskz_expandloadu_epi8(m, black_box(p));
3787	let e = _mm512_set_epi8(
3788	`32`, `31`, `30`, `0`, `29`, `0`, `0`, `0`, `28`, `27`, `0`, `0`, `26`, `0`, `25`, `0`, `24`, `23`, `22`, `21`, `0`, `0`, `0`, `0`, `0`,
3789	`0`, `0`, `0`, `20`, `19`, `18`, `17`, `16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `0`,
3790	`7`, `0`, `6`, `0`, `5`, `0`, `0`, `4`, `0`, `3`, `0`, `2`, `0`, `1`,
3791	);
3792	assert_eq_m512i(r, e);
3793	}
3794
3795	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3796	unsafe fn test_mm256_mask_expandloadu_epi8() {
3797	let src = _mm256_set1_epi8(`42`);
3798	let a = &[
3799	`1_i8`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
3800	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
3801	];
3802	let p = a.as_ptr();
3803	let m = `0b11101000_11001010_11110000_00001111`;
3804	let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p));
3805	let e = _mm256_set_epi8(
3806	`16`, `15`, `14`, `42`, `13`, `42`, `42`, `42`, `12`, `11`, `42`, `42`, `10`, `42`, `9`, `42`, `8`, `7`, `6`, `5`, `42`, `42`, `42`,
3807	`42`, `42`, `42`, `42`, `42`, `4`, `3`, `2`, `1`,
3808	);
3809	assert_eq_m256i(r, e);
3810	}
3811
3812	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3813	unsafe fn test_mm256_maskz_expandloadu_epi8() {
3814	let a = &[
3815	`1_i8`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
3816	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
3817	];
3818	let p = a.as_ptr();
3819	let m = `0b11101000_11001010_11110000_00001111`;
3820	let r = _mm256_maskz_expandloadu_epi8(m, black_box(p));
3821	let e = _mm256_set_epi8(
3822	`16`, `15`, `14`, `0`, `13`, `0`, `0`, `0`, `12`, `11`, `0`, `0`, `10`, `0`, `9`, `0`, `8`, `7`, `6`, `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
3823	`0`, `4`, `3`, `2`, `1`,
3824	);
3825	assert_eq_m256i(r, e);
3826	}
3827
3828	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3829	unsafe fn test_mm_mask_expandloadu_epi8() {
3830	let src = _mm_set1_epi8(`42`);
3831	let a = &[`1_i8`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
3832	let p = a.as_ptr();
3833	let m = `0b11101000_11001010`;
3834	let r = _mm_mask_expandloadu_epi8(src, m, black_box(p));
3835	let e = _mm_set_epi8(`8`, `7`, `6`, `42`, `5`, `42`, `42`, `42`, `4`, `3`, `42`, `42`, `2`, `42`, `1`, `42`);
3836	assert_eq_m128i(r, e);
3837	}
3838
3839	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3840	unsafe fn test_mm_maskz_expandloadu_epi8() {
3841	let a = &[`1_i8`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
3842	let p = a.as_ptr();
3843	let m = `0b11101000_11001010`;
3844	let r = _mm_maskz_expandloadu_epi8(m, black_box(p));
3845	let e = _mm_set_epi8(`8`, `7`, `6`, `0`, `5`, `0`, `0`, `0`, `4`, `3`, `0`, `0`, `2`, `0`, `1`, `0`);
3846	assert_eq_m128i(r, e);
3847	}
3848
3849	#[simd_test(enable = "avx512vbmi2")]
3850	unsafe fn test_mm512_mask_compressstoreu_epi16() {
3851	let a = _mm512_set_epi16(
3852	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`, `16`, `15`, `14`, `13`, `12`, `11`,
3853	`10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`,
3854	);
3855	let mut r = [`0_i16`; `32`];
3856	_mm512_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, `0`, a);
3857	assert_eq!(&r, &[`0_i16`; `32`]);
3858	_mm512_mask_compressstoreu_epi16(
3859	r.as_mut_ptr() as *mut _,
3860	`0b11110000_11001010_11111111_00000000`,
3861	a,
3862	);
3863	assert_eq!(
3864	&r,
3865	&[
3866	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `18`, `20`, `23`, `24`, `29`, `30`, `31`, `32`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
3867	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
3868	]
3869	);
3870	}
3871
3872	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3873	unsafe fn test_mm256_mask_compressstoreu_epi16() {
3874	let a = _mm256_set_epi16(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
3875	let mut r = [`0_i16`; `16`];
3876	_mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, `0`, a);
3877	assert_eq!(&r, &[`0_i16`; `16`]);
3878	_mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, `0b11110000_11001010`, a);
3879	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `13`, `14`, `15`, `16`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]);
3880	}
3881
3882	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3883	unsafe fn test_mm_mask_compressstoreu_epi16() {
3884	let a = _mm_set_epi16(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
3885	let mut r = [`0_i16`; `8`];
3886	_mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, `0`, a);
3887	assert_eq!(&r, &[`0_i16`; `8`]);
3888	_mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, `0b11110000`, a);
3889	assert_eq!(&r, &[`5`, `6`, `7`, `8`, `0`, `0`, `0`, `0`]);
3890	}
3891
3892	#[simd_test(enable = "avx512vbmi2")]
3893	unsafe fn test_mm512_mask_compressstoreu_epi8() {
3894	let a = _mm512_set_epi8(
3895	`64`, `63`, `62`, `61`, `60`, `59`, `58`, `57`, `56`, `55`, `54`, `53`, `52`, `51`, `50`, `49`, `48`, `47`, `46`, `45`, `44`, `43`,
3896	`42`, `41`, `40`, `39`, `38`, `37`, `36`, `35`, `34`, `33`, `32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`,
3897	`20`, `19`, `18`, `17`, `16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`,
3898	);
3899	let mut r = [`0_i8`; `64`];
3900	_mm512_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, `0`, a);
3901	assert_eq!(&r, &[`0_i8`; `64`]);
3902	_mm512_mask_compressstoreu_epi8(
3903	r.as_mut_ptr() as *mut _,
3904	`0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111`,
3905	a,
3906	);
3907	assert_eq!(
3908	&r,
3909	&[
3910	`1`, `2`, `3`, `4`, `13`, `14`, `15`, `16`, `17`, `19`, `21`, `23`, `26`, `28`, `30`, `32`, `41`, `42`, `43`, `44`, `45`, `46`,
3911	`47`, `48`, `50`, `52`, `55`, `56`, `61`, `62`, `63`, `64`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
3912	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
3913	]
3914	);
3915	}
3916
3917	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3918	unsafe fn test_mm256_mask_compressstoreu_epi8() {
3919	let a = _mm256_set_epi8(
3920	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`, `16`, `15`, `14`, `13`, `12`, `11`,
3921	`10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`,
3922	);
3923	let mut r = [`0_i8`; `32`];
3924	_mm256_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, `0`, a);
3925	assert_eq!(&r, &[`0_i8`; `32`]);
3926	_mm256_mask_compressstoreu_epi8(
3927	r.as_mut_ptr() as *mut _,
3928	`0b11110000_11001010_11111111_00000000`,
3929	a,
3930	);
3931	assert_eq!(
3932	&r,
3933	&[
3934	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `18`, `20`, `23`, `24`, `29`, `30`, `31`, `32`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
3935	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
3936	]
3937	);
3938	}
3939
3940	#[simd_test(enable = "avx512vbmi2,avx512vl")]
3941	unsafe fn test_mm_mask_compressstoreu_epi8() {
3942	let a = _mm_set_epi8(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
3943	let mut r = [`0_i8`; `16`];
3944	_mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, `0`, a);
3945	assert_eq!(&r, &[`0_i8`; `16`]);
3946	_mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, `0b11110000_11001010`, a);
3947	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `13`, `14`, `15`, `16`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]);
3948	}
3949	}
3950