avx512vbmi.rs source code [crates/core_arch/src/x86/avx512vbmi.rs]

1	use crate::core_arch::{simd::, x86::};
2	use crate::intrinsics::simd::*;
3
4	#[cfg(test)]
5	use stdarch_test::assert_instr;
6
7	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8	///
9	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi8&expand=4262)
10	#[inline]
11	#[target_feature(enable = "avx512vbmi")]
12	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
14	pub fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
15	unsafe { transmute(src:vpermi2b(a.as_i8x64(), idx.as_i8x64(), b.as_i8x64())) }
16	}
17
18	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
19	///
20	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi8&expand=4259)
21	#[inline]
22	#[target_feature(enable = "avx512vbmi")]
23	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24	#[cfg_attr(test, assert_instr(vpermt2b))]
25	pub fn _mm512_mask_permutex2var_epi8(
26	a: __m512i,
27	k: __mmask64,
28	idx: __m512i,
29	b: __m512i,
30	) -> __m512i {
31	unsafe {
32	let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
33	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i8x64()))
34	}
35	}
36
37	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
38	///
39	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi8&expand=4261)
40	#[inline]
41	#[target_feature(enable = "avx512vbmi")]
42	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
44	pub fn _mm512_maskz_permutex2var_epi8(
45	k: __mmask64,
46	a: __m512i,
47	idx: __m512i,
48	b: __m512i,
49	) -> __m512i {
50	unsafe {
51	let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
52	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i8x64::ZERO))
53	}
54	}
55
56	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
57	///
58	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi8&expand=4260)
59	#[inline]
60	#[target_feature(enable = "avx512vbmi")]
61	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
62	#[cfg_attr(test, assert_instr(vpermi2b))]
63	pub fn _mm512_mask2_permutex2var_epi8(
64	a: __m512i,
65	idx: __m512i,
66	k: __mmask64,
67	b: __m512i,
68	) -> __m512i {
69	unsafe {
70	let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
71	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i8x64()))
72	}
73	}
74
75	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
76	///
77	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi8&expand=4258)
78	#[inline]
79	#[target_feature(enable = "avx512vbmi,avx512vl")]
80	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
81	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
82	pub fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
83	unsafe { transmute(src:vpermi2b256(a.as_i8x32(), idx.as_i8x32(), b.as_i8x32())) }
84	}
85
86	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
87	///
88	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi8&expand=4255)
89	#[inline]
90	#[target_feature(enable = "avx512vbmi,avx512vl")]
91	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
92	#[cfg_attr(test, assert_instr(vpermt2b))]
93	pub fn _mm256_mask_permutex2var_epi8(
94	a: __m256i,
95	k: __mmask32,
96	idx: __m256i,
97	b: __m256i,
98	) -> __m256i {
99	unsafe {
100	let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
101	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i8x32()))
102	}
103	}
104
105	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
106	///
107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi8&expand=4257)
108	#[inline]
109	#[target_feature(enable = "avx512vbmi,avx512vl")]
110	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
111	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
112	pub fn _mm256_maskz_permutex2var_epi8(
113	k: __mmask32,
114	a: __m256i,
115	idx: __m256i,
116	b: __m256i,
117	) -> __m256i {
118	unsafe {
119	let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
120	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i8x32::ZERO))
121	}
122	}
123
124	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
125	///
126	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi8&expand=4256)
127	#[inline]
128	#[target_feature(enable = "avx512vbmi,avx512vl")]
129	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
130	#[cfg_attr(test, assert_instr(vpermi2b))]
131	pub fn _mm256_mask2_permutex2var_epi8(
132	a: __m256i,
133	idx: __m256i,
134	k: __mmask32,
135	b: __m256i,
136	) -> __m256i {
137	unsafe {
138	let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
139	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i8x32()))
140	}
141	}
142
143	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
144	///
145	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi8&expand=4254)
146	#[inline]
147	#[target_feature(enable = "avx512vbmi,avx512vl")]
148	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
149	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
150	pub fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
151	unsafe { transmute(src:vpermi2b128(a.as_i8x16(), idx.as_i8x16(), b.as_i8x16())) }
152	}
153
154	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
155	///
156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi8&expand=4251)
157	#[inline]
158	#[target_feature(enable = "avx512vbmi,avx512vl")]
159	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
160	#[cfg_attr(test, assert_instr(vpermt2b))]
161	pub fn _mm_mask_permutex2var_epi8(a: __m128i, k: __mmask16, idx: __m128i, b: __m128i) -> __m128i {
162	unsafe {
163	let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
164	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i8x16()))
165	}
166	}
167
168	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
169	///
170	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi8&expand=4253)
171	#[inline]
172	#[target_feature(enable = "avx512vbmi,avx512vl")]
173	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
174	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
175	pub fn _mm_maskz_permutex2var_epi8(k: __mmask16, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
176	unsafe {
177	let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
178	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i8x16::ZERO))
179	}
180	}
181
182	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
183	///
184	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi8&expand=4252)
185	#[inline]
186	#[target_feature(enable = "avx512vbmi,avx512vl")]
187	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
188	#[cfg_attr(test, assert_instr(vpermi2b))]
189	pub fn _mm_mask2_permutex2var_epi8(a: __m128i, idx: __m128i, k: __mmask16, b: __m128i) -> __m128i {
190	unsafe {
191	let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
192	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i8x16()))
193	}
194	}
195
196	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
197	///
198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi8&expand=4316)
199	#[inline]
200	#[target_feature(enable = "avx512vbmi")]
201	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
202	#[cfg_attr(test, assert_instr(vpermb))]
203	pub fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
204	unsafe { transmute(src:vpermb(a.as_i8x64(), idx.as_i8x64())) }
205	}
206
207	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
208	///
209	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi8&expand=4314)
210	#[inline]
211	#[target_feature(enable = "avx512vbmi")]
212	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
213	#[cfg_attr(test, assert_instr(vpermb))]
214	pub fn _mm512_mask_permutexvar_epi8(
215	src: __m512i,
216	k: __mmask64,
217	idx: __m512i,
218	a: __m512i,
219	) -> __m512i {
220	unsafe {
221	let permute: i8x64 = _mm512_permutexvar_epi8(idx, a).as_i8x64();
222	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i8x64()))
223	}
224	}
225
226	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
227	///
228	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi8&expand=4315)
229	#[inline]
230	#[target_feature(enable = "avx512vbmi")]
231	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
232	#[cfg_attr(test, assert_instr(vpermb))]
233	pub fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i {
234	unsafe {
235	let permute: i8x64 = _mm512_permutexvar_epi8(idx, a).as_i8x64();
236	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i8x64::ZERO))
237	}
238	}
239
240	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
241	///
242	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi8&expand=4313)
243	#[inline]
244	#[target_feature(enable = "avx512vbmi,avx512vl")]
245	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
246	#[cfg_attr(test, assert_instr(vpermb))]
247	pub fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
248	unsafe { transmute(src:vpermb256(a.as_i8x32(), idx.as_i8x32())) }
249	}
250
251	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
252	///
253	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi8&expand=4311)
254	#[inline]
255	#[target_feature(enable = "avx512vbmi,avx512vl")]
256	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
257	#[cfg_attr(test, assert_instr(vpermb))]
258	pub fn _mm256_mask_permutexvar_epi8(
259	src: __m256i,
260	k: __mmask32,
261	idx: __m256i,
262	a: __m256i,
263	) -> __m256i {
264	unsafe {
265	let permute: i8x32 = _mm256_permutexvar_epi8(idx, a).as_i8x32();
266	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i8x32()))
267	}
268	}
269
270	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
271	///
272	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi8&expand=4312)
273	#[inline]
274	#[target_feature(enable = "avx512vbmi,avx512vl")]
275	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
276	#[cfg_attr(test, assert_instr(vpermb))]
277	pub fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i {
278	unsafe {
279	let permute: i8x32 = _mm256_permutexvar_epi8(idx, a).as_i8x32();
280	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i8x32::ZERO))
281	}
282	}
283
284	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
285	///
286	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi8&expand=4310)
287	#[inline]
288	#[target_feature(enable = "avx512vbmi,avx512vl")]
289	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
290	#[cfg_attr(test, assert_instr(vpermb))]
291	pub fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
292	unsafe { transmute(src:vpermb128(a.as_i8x16(), idx.as_i8x16())) }
293	}
294
295	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
296	///
297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi8&expand=4308)
298	#[inline]
299	#[target_feature(enable = "avx512vbmi,avx512vl")]
300	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
301	#[cfg_attr(test, assert_instr(vpermb))]
302	pub fn _mm_mask_permutexvar_epi8(src: __m128i, k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
303	unsafe {
304	let permute: i8x16 = _mm_permutexvar_epi8(idx, a).as_i8x16();
305	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i8x16()))
306	}
307	}
308
309	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
310	///
311	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi8&expand=4309)
312	#[inline]
313	#[target_feature(enable = "avx512vbmi,avx512vl")]
314	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315	#[cfg_attr(test, assert_instr(vpermb))]
316	pub fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
317	unsafe {
318	let permute: i8x16 = _mm_permutexvar_epi8(idx, a).as_i8x16();
319	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i8x16::ZERO))
320	}
321	}
322
323	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
324	///
325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_multishift_epi64_epi8&expand=4026)
326	#[inline]
327	#[target_feature(enable = "avx512vbmi")]
328	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
329	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
330	pub fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
331	unsafe { transmute(src:vpmultishiftqb(a.as_i8x64(), b.as_i8x64())) }
332	}
333
334	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
335	///
336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_multishift_epi64_epi8&expand=4024)
337	#[inline]
338	#[target_feature(enable = "avx512vbmi")]
339	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
340	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
341	pub fn _mm512_mask_multishift_epi64_epi8(
342	src: __m512i,
343	k: __mmask64,
344	a: __m512i,
345	b: __m512i,
346	) -> __m512i {
347	unsafe {
348	let multishift: i8x64 = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
349	transmute(src:simd_select_bitmask(m:k, yes:multishift, no:src.as_i8x64()))
350	}
351	}
352
353	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
354	///
355	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_multishift_epi64_epi8&expand=4025)
356	#[inline]
357	#[target_feature(enable = "avx512vbmi")]
358	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
359	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
360	pub fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
361	unsafe {
362	let multishift: i8x64 = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
363	transmute(src:simd_select_bitmask(m:k, yes:multishift, no:i8x64::ZERO))
364	}
365	}
366
367	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
368	///
369	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_multishift_epi64_epi8&expand=4023)
370	#[inline]
371	#[target_feature(enable = "avx512vbmi,avx512vl")]
372	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
373	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
374	pub fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
375	unsafe { transmute(src:vpmultishiftqb256(a.as_i8x32(), b.as_i8x32())) }
376	}
377
378	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
379	///
380	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_multishift_epi64_epi8&expand=4021)
381	#[inline]
382	#[target_feature(enable = "avx512vbmi,avx512vl")]
383	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
384	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
385	pub fn _mm256_mask_multishift_epi64_epi8(
386	src: __m256i,
387	k: __mmask32,
388	a: __m256i,
389	b: __m256i,
390	) -> __m256i {
391	unsafe {
392	let multishift: i8x32 = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
393	transmute(src:simd_select_bitmask(m:k, yes:multishift, no:src.as_i8x32()))
394	}
395	}
396
397	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
398	///
399	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_multishift_epi64_epi8&expand=4022)
400	#[inline]
401	#[target_feature(enable = "avx512vbmi,avx512vl")]
402	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
403	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
404	pub fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
405	unsafe {
406	let multishift: i8x32 = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
407	transmute(src:simd_select_bitmask(m:k, yes:multishift, no:i8x32::ZERO))
408	}
409	}
410
411	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
412	///
413	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_multishift_epi64_epi8&expand=4020)
414	#[inline]
415	#[target_feature(enable = "avx512vbmi,avx512vl")]
416	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
417	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
418	pub fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
419	unsafe { transmute(src:vpmultishiftqb128(a.as_i8x16(), b.as_i8x16())) }
420	}
421
422	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
423	///
424	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_multishift_epi64_epi8&expand=4018)
425	#[inline]
426	#[target_feature(enable = "avx512vbmi,avx512vl")]
427	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
428	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
429	pub fn _mm_mask_multishift_epi64_epi8(
430	src: __m128i,
431	k: __mmask16,
432	a: __m128i,
433	b: __m128i,
434	) -> __m128i {
435	unsafe {
436	let multishift: i8x16 = _mm_multishift_epi64_epi8(a, b).as_i8x16();
437	transmute(src:simd_select_bitmask(m:k, yes:multishift, no:src.as_i8x16()))
438	}
439	}
440
441	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
442	///
443	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_multishift_epi64_epi8&expand=4019)
444	#[inline]
445	#[target_feature(enable = "avx512vbmi,avx512vl")]
446	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
447	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
448	pub fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
449	unsafe {
450	let multishift: i8x16 = _mm_multishift_epi64_epi8(a, b).as_i8x16();
451	transmute(src:simd_select_bitmask(m:k, yes:multishift, no:i8x16::ZERO))
452	}
453	}
454
455	#[allow(improper_ctypes)]
456	unsafe extern "C" {
457	#[link_name = "llvm.x86.avx512.vpermi2var.qi.512"]
458	unsafefn vpermi2b(a: i8x64, idx: i8x64, b: i8x64) -> i8x64;
459	#[link_name = "llvm.x86.avx512.vpermi2var.qi.256"]
460	unsafefn vpermi2b256(a: i8x32, idx: i8x32, b: i8x32) -> i8x32;
461	#[link_name = "llvm.x86.avx512.vpermi2var.qi.128"]
462	unsafefn vpermi2b128(a: i8x16, idx: i8x16, b: i8x16) -> i8x16;
463
464	#[link_name = "llvm.x86.avx512.permvar.qi.512"]
465	unsafefn vpermb(a: i8x64, idx: i8x64) -> i8x64;
466	#[link_name = "llvm.x86.avx512.permvar.qi.256"]
467	unsafefn vpermb256(a: i8x32, idx: i8x32) -> i8x32;
468	#[link_name = "llvm.x86.avx512.permvar.qi.128"]
469	unsafefn vpermb128(a: i8x16, idx: i8x16) -> i8x16;
470
471	#[link_name = "llvm.x86.avx512.pmultishift.qb.512"]
472	unsafefn vpmultishiftqb(a: i8x64, b: i8x64) -> i8x64;
473	#[link_name = "llvm.x86.avx512.pmultishift.qb.256"]
474	unsafefn vpmultishiftqb256(a: i8x32, b: i8x32) -> i8x32;
475	#[link_name = "llvm.x86.avx512.pmultishift.qb.128"]
476	unsafefn vpmultishiftqb128(a: i8x16, b: i8x16) -> i8x16;
477	}
478
479	#[cfg(test)]
480	mod tests {
481
482	use stdarch_test::simd_test;
483
484	use crate::core_arch::x86::*;
485
486	#[simd_test(enable = "avx512vbmi")]
487	unsafe fn test_mm512_permutex2var_epi8() {
488	#[rustfmt::skip]
489	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
490	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
491	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
492	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
493	#[rustfmt::skip]
494	let idx = _mm512_set_epi8(`1`, `1`<<`6`, `2`, `1`<<`6`, `3`, `1`<<`6`, `4`, `1`<<`6`, `5`, `1`<<`6`, `6`, `1`<<`6`, `7`, `1`<<`6`, `8`, `1`<<`6`,
495	`9`, `1`<<`6`, `10`, `1`<<`6`, `11`, `1`<<`6`, `12`, `1`<<`6`, `13`, `1`<<`6`, `14`, `1`<<`6`, `15`, `1`<<`6`, `16`, `1`<<`6`,
496	`17`, `1`<<`6`, `18`, `1`<<`6`, `19`, `1`<<`6`, `20`, `1`<<`6`, `21`, `1`<<`6`, `22`, `1`<<`6`, `23`, `1`<<`6`, `24`, `1`<<`6`,
497	`25`, `1`<<`6`, `26`, `1`<<`6`, `27`, `1`<<`6`, `28`, `1`<<`6`, `29`, `1`<<`6`, `30`, `1`<<`6`, `31`, `1`<<`6`, `32`, `1`<<`6`);
498	let b = _mm512_set1_epi8(`100`);
499	let r = _mm512_permutex2var_epi8(a, idx, b);
500	#[rustfmt::skip]
501	let e = _mm512_set_epi8(
502	`62`, `100`, `61`, `100`, `60`, `100`, `59`, `100`, `58`, `100`, `57`, `100`, `56`, `100`, `55`, `100`,
503	`54`, `100`, `53`, `100`, `52`, `100`, `51`, `100`, `50`, `100`, `49`, `100`, `48`, `100`, `47`, `100`,
504	`46`, `100`, `45`, `100`, `44`, `100`, `43`, `100`, `42`, `100`, `41`, `100`, `40`, `100`, `39`, `100`,
505	`38`, `100`, `37`, `100`, `36`, `100`, `35`, `100`, `34`, `100`, `33`, `100`, `32`, `100`, `31`, `100`,
506	);
507	assert_eq_m512i(r, e);
508	}
509
510	#[simd_test(enable = "avx512vbmi")]
511	unsafe fn test_mm512_mask_permutex2var_epi8() {
512	#[rustfmt::skip]
513	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
514	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
515	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
516	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
517	#[rustfmt::skip]
518	let idx = _mm512_set_epi8(`1`, `1`<<`6`, `2`, `1`<<`6`, `3`, `1`<<`6`, `4`, `1`<<`6`, `5`, `1`<<`6`, `6`, `1`<<`6`, `7`, `1`<<`6`, `8`, `1`<<`6`,
519	`9`, `1`<<`6`, `10`, `1`<<`6`, `11`, `1`<<`6`, `12`, `1`<<`6`, `13`, `1`<<`6`, `14`, `1`<<`6`, `15`, `1`<<`6`, `16`, `1`<<`6`,
520	`17`, `1`<<`6`, `18`, `1`<<`6`, `19`, `1`<<`6`, `20`, `1`<<`6`, `21`, `1`<<`6`, `22`, `1`<<`6`, `23`, `1`<<`6`, `24`, `1`<<`6`,
521	`25`, `1`<<`6`, `26`, `1`<<`6`, `27`, `1`<<`6`, `28`, `1`<<`6`, `29`, `1`<<`6`, `30`, `1`<<`6`, `31`, `1`<<`6`, `32`, `1`<<`6`);
522	let b = _mm512_set1_epi8(`100`);
523	let r = _mm512_mask_permutex2var_epi8(a, `0`, idx, b);
524	assert_eq_m512i(r, a);
525	let r = _mm512_mask_permutex2var_epi8(
526	a,
527	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
528	idx,
529	b,
530	);
531	#[rustfmt::skip]
532	let e = _mm512_set_epi8(
533	`62`, `100`, `61`, `100`, `60`, `100`, `59`, `100`, `58`, `100`, `57`, `100`, `56`, `100`, `55`, `100`,
534	`54`, `100`, `53`, `100`, `52`, `100`, `51`, `100`, `50`, `100`, `49`, `100`, `48`, `100`, `47`, `100`,
535	`46`, `100`, `45`, `100`, `44`, `100`, `43`, `100`, `42`, `100`, `41`, `100`, `40`, `100`, `39`, `100`,
536	`38`, `100`, `37`, `100`, `36`, `100`, `35`, `100`, `34`, `100`, `33`, `100`, `32`, `100`, `31`, `100`,
537	);
538	assert_eq_m512i(r, e);
539	}
540
541	#[simd_test(enable = "avx512vbmi")]
542	unsafe fn test_mm512_maskz_permutex2var_epi8() {
543	#[rustfmt::skip]
544	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
545	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
546	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
547	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
548	#[rustfmt::skip]
549	let idx = _mm512_set_epi8(`1`, `1`<<`6`, `2`, `1`<<`6`, `3`, `1`<<`6`, `4`, `1`<<`6`, `5`, `1`<<`6`, `6`, `1`<<`6`, `7`, `1`<<`6`, `8`, `1`<<`6`,
550	`9`, `1`<<`6`, `10`, `1`<<`6`, `11`, `1`<<`6`, `12`, `1`<<`6`, `13`, `1`<<`6`, `14`, `1`<<`6`, `15`, `1`<<`6`, `16`, `1`<<`6`,
551	`17`, `1`<<`6`, `18`, `1`<<`6`, `19`, `1`<<`6`, `20`, `1`<<`6`, `21`, `1`<<`6`, `22`, `1`<<`6`, `23`, `1`<<`6`, `24`, `1`<<`6`,
552	`25`, `1`<<`6`, `26`, `1`<<`6`, `27`, `1`<<`6`, `28`, `1`<<`6`, `29`, `1`<<`6`, `30`, `1`<<`6`, `31`, `1`<<`6`, `32`, `1`<<`6`);
553	let b = _mm512_set1_epi8(`100`);
554	let r = _mm512_maskz_permutex2var_epi8(`0`, a, idx, b);
555	assert_eq_m512i(r, _mm512_setzero_si512());
556	let r = _mm512_maskz_permutex2var_epi8(
557	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
558	a,
559	idx,
560	b,
561	);
562	#[rustfmt::skip]
563	let e = _mm512_set_epi8(
564	`62`, `100`, `61`, `100`, `60`, `100`, `59`, `100`, `58`, `100`, `57`, `100`, `56`, `100`, `55`, `100`,
565	`54`, `100`, `53`, `100`, `52`, `100`, `51`, `100`, `50`, `100`, `49`, `100`, `48`, `100`, `47`, `100`,
566	`46`, `100`, `45`, `100`, `44`, `100`, `43`, `100`, `42`, `100`, `41`, `100`, `40`, `100`, `39`, `100`,
567	`38`, `100`, `37`, `100`, `36`, `100`, `35`, `100`, `34`, `100`, `33`, `100`, `32`, `100`, `31`, `100`,
568	);
569	assert_eq_m512i(r, e);
570	}
571
572	#[simd_test(enable = "avx512vbmi")]
573	unsafe fn test_mm512_mask2_permutex2var_epi8() {
574	#[rustfmt::skip]
575	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
576	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
577	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
578	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
579	#[rustfmt::skip]
580	let idx = _mm512_set_epi8(`1`, `1`<<`6`, `2`, `1`<<`6`, `3`, `1`<<`6`, `4`, `1`<<`6`, `5`, `1`<<`6`, `6`, `1`<<`6`, `7`, `1`<<`6`, `8`, `1`<<`6`,
581	`9`, `1`<<`6`, `10`, `1`<<`6`, `11`, `1`<<`6`, `12`, `1`<<`6`, `13`, `1`<<`6`, `14`, `1`<<`6`, `15`, `1`<<`6`, `16`, `1`<<`6`,
582	`17`, `1`<<`6`, `18`, `1`<<`6`, `19`, `1`<<`6`, `20`, `1`<<`6`, `21`, `1`<<`6`, `22`, `1`<<`6`, `23`, `1`<<`6`, `24`, `1`<<`6`,
583	`25`, `1`<<`6`, `26`, `1`<<`6`, `27`, `1`<<`6`, `28`, `1`<<`6`, `29`, `1`<<`6`, `30`, `1`<<`6`, `31`, `1`<<`6`, `32`, `1`<<`6`);
584	let b = _mm512_set1_epi8(`100`);
585	let r = _mm512_mask2_permutex2var_epi8(a, idx, `0`, b);
586	assert_eq_m512i(r, idx);
587	let r = _mm512_mask2_permutex2var_epi8(
588	a,
589	idx,
590	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
591	b,
592	);
593	#[rustfmt::skip]
594	let e = _mm512_set_epi8(
595	`62`, `100`, `61`, `100`, `60`, `100`, `59`, `100`, `58`, `100`, `57`, `100`, `56`, `100`, `55`, `100`,
596	`54`, `100`, `53`, `100`, `52`, `100`, `51`, `100`, `50`, `100`, `49`, `100`, `48`, `100`, `47`, `100`,
597	`46`, `100`, `45`, `100`, `44`, `100`, `43`, `100`, `42`, `100`, `41`, `100`, `40`, `100`, `39`, `100`,
598	`38`, `100`, `37`, `100`, `36`, `100`, `35`, `100`, `34`, `100`, `33`, `100`, `32`, `100`, `31`, `100`,
599	);
600	assert_eq_m512i(r, e);
601	}
602
603	#[simd_test(enable = "avx512vbmi,avx512vl")]
604	unsafe fn test_mm256_permutex2var_epi8() {
605	#[rustfmt::skip]
606	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
607	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
608	#[rustfmt::skip]
609	let idx = _mm256_set_epi8(`1`, `1`<<`5`, `2`, `1`<<`5`, `3`, `1`<<`5`, `4`, `1`<<`5`, `5`, `1`<<`5`, `6`, `1`<<`5`, `7`, `1`<<`5`, `8`, `1`<<`5`,
610	`9`, `1`<<`5`, `10`, `1`<<`5`, `11`, `1`<<`5`, `12`, `1`<<`5`, `13`, `1`<<`5`, `14`, `1`<<`5`, `15`, `1`<<`5`, `16`, `1`<<`5`);
611	let b = _mm256_set1_epi8(`100`);
612	let r = _mm256_permutex2var_epi8(a, idx, b);
613	#[rustfmt::skip]
614	let e = _mm256_set_epi8(
615	`30`, `100`, `29`, `100`, `28`, `100`, `27`, `100`, `26`, `100`, `25`, `100`, `24`, `100`, `23`, `100`,
616	`22`, `100`, `21`, `100`, `20`, `100`, `19`, `100`, `18`, `100`, `17`, `100`, `16`, `100`, `15`, `100`,
617	);
618	assert_eq_m256i(r, e);
619	}
620
621	#[simd_test(enable = "avx512vbmi,avx512vl")]
622	unsafe fn test_mm256_mask_permutex2var_epi8() {
623	#[rustfmt::skip]
624	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
625	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
626	#[rustfmt::skip]
627	let idx = _mm256_set_epi8(`1`, `1`<<`5`, `2`, `1`<<`5`, `3`, `1`<<`5`, `4`, `1`<<`5`, `5`, `1`<<`5`, `6`, `1`<<`5`, `7`, `1`<<`5`, `8`, `1`<<`5`,
628	`9`, `1`<<`5`, `10`, `1`<<`5`, `11`, `1`<<`5`, `12`, `1`<<`5`, `13`, `1`<<`5`, `14`, `1`<<`5`, `15`, `1`<<`5`, `16`, `1`<<`5`);
629	let b = _mm256_set1_epi8(`100`);
630	let r = _mm256_mask_permutex2var_epi8(a, `0`, idx, b);
631	assert_eq_m256i(r, a);
632	let r = _mm256_mask_permutex2var_epi8(a, `0b11111111_11111111_11111111_11111111`, idx, b);
633	#[rustfmt::skip]
634	let e = _mm256_set_epi8(
635	`30`, `100`, `29`, `100`, `28`, `100`, `27`, `100`, `26`, `100`, `25`, `100`, `24`, `100`, `23`, `100`,
636	`22`, `100`, `21`, `100`, `20`, `100`, `19`, `100`, `18`, `100`, `17`, `100`, `16`, `100`, `15`, `100`,
637	);
638	assert_eq_m256i(r, e);
639	}
640
641	#[simd_test(enable = "avx512vbmi,avx512vl")]
642	unsafe fn test_mm256_maskz_permutex2var_epi8() {
643	#[rustfmt::skip]
644	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
645	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
646	#[rustfmt::skip]
647	let idx = _mm256_set_epi8(`1`, `1`<<`5`, `2`, `1`<<`5`, `3`, `1`<<`5`, `4`, `1`<<`5`, `5`, `1`<<`5`, `6`, `1`<<`5`, `7`, `1`<<`5`, `8`, `1`<<`5`,
648	`9`, `1`<<`5`, `10`, `1`<<`5`, `11`, `1`<<`5`, `12`, `1`<<`5`, `13`, `1`<<`5`, `14`, `1`<<`5`, `15`, `1`<<`5`, `16`, `1`<<`5`);
649	let b = _mm256_set1_epi8(`100`);
650	let r = _mm256_maskz_permutex2var_epi8(`0`, a, idx, b);
651	assert_eq_m256i(r, _mm256_setzero_si256());
652	let r = _mm256_maskz_permutex2var_epi8(`0b11111111_11111111_11111111_11111111`, a, idx, b);
653	#[rustfmt::skip]
654	let e = _mm256_set_epi8(
655	`30`, `100`, `29`, `100`, `28`, `100`, `27`, `100`, `26`, `100`, `25`, `100`, `24`, `100`, `23`, `100`,
656	`22`, `100`, `21`, `100`, `20`, `100`, `19`, `100`, `18`, `100`, `17`, `100`, `16`, `100`, `15`, `100`,
657	);
658	assert_eq_m256i(r, e);
659	}
660
661	#[simd_test(enable = "avx512vbmi,avx512vl")]
662	unsafe fn test_mm256_mask2_permutex2var_epi8() {
663	#[rustfmt::skip]
664	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
665	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
666	#[rustfmt::skip]
667	let idx = _mm256_set_epi8(`1`, `1`<<`5`, `2`, `1`<<`5`, `3`, `1`<<`5`, `4`, `1`<<`5`, `5`, `1`<<`5`, `6`, `1`<<`5`, `7`, `1`<<`5`, `8`, `1`<<`5`,
668	`9`, `1`<<`5`, `10`, `1`<<`5`, `11`, `1`<<`5`, `12`, `1`<<`5`, `13`, `1`<<`5`, `14`, `1`<<`5`, `15`, `1`<<`5`, `16`, `1`<<`5`);
669	let b = _mm256_set1_epi8(`100`);
670	let r = _mm256_mask2_permutex2var_epi8(a, idx, `0`, b);
671	assert_eq_m256i(r, idx);
672	let r = _mm256_mask2_permutex2var_epi8(a, idx, `0b11111111_11111111_11111111_11111111`, b);
673	#[rustfmt::skip]
674	let e = _mm256_set_epi8(
675	`30`, `100`, `29`, `100`, `28`, `100`, `27`, `100`, `26`, `100`, `25`, `100`, `24`, `100`, `23`, `100`,
676	`22`, `100`, `21`, `100`, `20`, `100`, `19`, `100`, `18`, `100`, `17`, `100`, `16`, `100`, `15`, `100`,
677	);
678	assert_eq_m256i(r, e);
679	}
680
681	#[simd_test(enable = "avx512vbmi,avx512vl")]
682	unsafe fn test_mm_permutex2var_epi8() {
683	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
684	#[rustfmt::skip]
685	let idx = _mm_set_epi8(`1`, `1` << `4`, `2`, `1` << `4`, `3`, `1` << `4`, `4`, `1` << `4`, `5`, `1` << `4`, `6`, `1` << `4`, `7`, `1` << `4`, `8`, `1` << `4`);
686	let b = _mm_set1_epi8(`100`);
687	let r = _mm_permutex2var_epi8(a, idx, b);
688	let e = _mm_set_epi8(
689	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
690	);
691	assert_eq_m128i(r, e);
692	}
693
694	#[simd_test(enable = "avx512vbmi,avx512vl")]
695	unsafe fn test_mm_mask_permutex2var_epi8() {
696	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
697	#[rustfmt::skip]
698	let idx = _mm_set_epi8(`1`, `1` << `4`, `2`, `1` << `4`, `3`, `1` << `4`, `4`, `1` << `4`, `5`, `1` << `4`, `6`, `1` << `4`, `7`, `1` << `4`, `8`, `1` << `4`);
699	let b = _mm_set1_epi8(`100`);
700	let r = _mm_mask_permutex2var_epi8(a, `0`, idx, b);
701	assert_eq_m128i(r, a);
702	let r = _mm_mask_permutex2var_epi8(a, `0b11111111_11111111`, idx, b);
703	let e = _mm_set_epi8(
704	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
705	);
706	assert_eq_m128i(r, e);
707	}
708
709	#[simd_test(enable = "avx512vbmi,avx512vl")]
710	unsafe fn test_mm_maskz_permutex2var_epi8() {
711	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
712	#[rustfmt::skip]
713	let idx = _mm_set_epi8(`1`, `1` << `4`, `2`, `1` << `4`, `3`, `1` << `4`, `4`, `1` << `4`, `5`, `1` << `4`, `6`, `1` << `4`, `7`, `1` << `4`, `8`, `1` << `4`);
714	let b = _mm_set1_epi8(`100`);
715	let r = _mm_maskz_permutex2var_epi8(`0`, a, idx, b);
716	assert_eq_m128i(r, _mm_setzero_si128());
717	let r = _mm_maskz_permutex2var_epi8(`0b11111111_11111111`, a, idx, b);
718	let e = _mm_set_epi8(
719	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
720	);
721	assert_eq_m128i(r, e);
722	}
723
724	#[simd_test(enable = "avx512vbmi,avx512vl")]
725	unsafe fn test_mm_mask2_permutex2var_epi8() {
726	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
727	#[rustfmt::skip]
728	let idx = _mm_set_epi8(`1`, `1` << `4`, `2`, `1` << `4`, `3`, `1` << `4`, `4`, `1` << `4`, `5`, `1` << `4`, `6`, `1` << `4`, `7`, `1` << `4`, `8`, `1` << `4`);
729	let b = _mm_set1_epi8(`100`);
730	let r = _mm_mask2_permutex2var_epi8(a, idx, `0`, b);
731	assert_eq_m128i(r, idx);
732	let r = _mm_mask2_permutex2var_epi8(a, idx, `0b11111111_11111111`, b);
733	let e = _mm_set_epi8(
734	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
735	);
736	assert_eq_m128i(r, e);
737	}
738
739	#[simd_test(enable = "avx512vbmi")]
740	unsafe fn test_mm512_permutexvar_epi8() {
741	let idx = _mm512_set1_epi8(`1`);
742	#[rustfmt::skip]
743	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
744	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
745	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
746	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
747	let r = _mm512_permutexvar_epi8(idx, a);
748	let e = _mm512_set1_epi8(`62`);
749	assert_eq_m512i(r, e);
750	}
751
752	#[simd_test(enable = "avx512vbmi")]
753	unsafe fn test_mm512_mask_permutexvar_epi8() {
754	let idx = _mm512_set1_epi8(`1`);
755	#[rustfmt::skip]
756	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
757	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
758	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
759	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
760	let r = _mm512_mask_permutexvar_epi8(a, `0`, idx, a);
761	assert_eq_m512i(r, a);
762	let r = _mm512_mask_permutexvar_epi8(
763	a,
764	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
765	idx,
766	a,
767	);
768	let e = _mm512_set1_epi8(`62`);
769	assert_eq_m512i(r, e);
770	}
771
772	#[simd_test(enable = "avx512vbmi")]
773	unsafe fn test_mm512_maskz_permutexvar_epi8() {
774	let idx = _mm512_set1_epi8(`1`);
775	#[rustfmt::skip]
776	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
777	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
778	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
779	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
780	let r = _mm512_maskz_permutexvar_epi8(`0`, idx, a);
781	assert_eq_m512i(r, _mm512_setzero_si512());
782	let r = _mm512_maskz_permutexvar_epi8(
783	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
784	idx,
785	a,
786	);
787	let e = _mm512_set1_epi8(`62`);
788	assert_eq_m512i(r, e);
789	}
790
791	#[simd_test(enable = "avx512vbmi,avx512vl")]
792	unsafe fn test_mm256_permutexvar_epi8() {
793	let idx = _mm256_set1_epi8(`1`);
794	#[rustfmt::skip]
795	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
796	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
797	let r = _mm256_permutexvar_epi8(idx, a);
798	let e = _mm256_set1_epi8(`30`);
799	assert_eq_m256i(r, e);
800	}
801
802	#[simd_test(enable = "avx512vbmi,avx512vl")]
803	unsafe fn test_mm256_mask_permutexvar_epi8() {
804	let idx = _mm256_set1_epi8(`1`);
805	#[rustfmt::skip]
806	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
807	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
808	let r = _mm256_mask_permutexvar_epi8(a, `0`, idx, a);
809	assert_eq_m256i(r, a);
810	let r = _mm256_mask_permutexvar_epi8(a, `0b11111111_11111111_11111111_11111111`, idx, a);
811	let e = _mm256_set1_epi8(`30`);
812	assert_eq_m256i(r, e);
813	}
814
815	#[simd_test(enable = "avx512vbmi,avx512vl")]
816	unsafe fn test_mm256_maskz_permutexvar_epi8() {
817	let idx = _mm256_set1_epi8(`1`);
818	#[rustfmt::skip]
819	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
820	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
821	let r = _mm256_maskz_permutexvar_epi8(`0`, idx, a);
822	assert_eq_m256i(r, _mm256_setzero_si256());
823	let r = _mm256_maskz_permutexvar_epi8(`0b11111111_11111111_11111111_11111111`, idx, a);
824	let e = _mm256_set1_epi8(`30`);
825	assert_eq_m256i(r, e);
826	}
827
828	#[simd_test(enable = "avx512vbmi,avx512vl")]
829	unsafe fn test_mm_permutexvar_epi8() {
830	let idx = _mm_set1_epi8(`1`);
831	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
832	let r = _mm_permutexvar_epi8(idx, a);
833	let e = _mm_set1_epi8(`14`);
834	assert_eq_m128i(r, e);
835	}
836
837	#[simd_test(enable = "avx512vbmi,avx512vl")]
838	unsafe fn test_mm_mask_permutexvar_epi8() {
839	let idx = _mm_set1_epi8(`1`);
840	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
841	let r = _mm_mask_permutexvar_epi8(a, `0`, idx, a);
842	assert_eq_m128i(r, a);
843	let r = _mm_mask_permutexvar_epi8(a, `0b11111111_11111111`, idx, a);
844	let e = _mm_set1_epi8(`14`);
845	assert_eq_m128i(r, e);
846	}
847
848	#[simd_test(enable = "avx512vbmi,avx512vl")]
849	unsafe fn test_mm_maskz_permutexvar_epi8() {
850	let idx = _mm_set1_epi8(`1`);
851	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
852	let r = _mm_maskz_permutexvar_epi8(`0`, idx, a);
853	assert_eq_m128i(r, _mm_setzero_si128());
854	let r = _mm_maskz_permutexvar_epi8(`0b11111111_11111111`, idx, a);
855	let e = _mm_set1_epi8(`14`);
856	assert_eq_m128i(r, e);
857	}
858
859	#[simd_test(enable = "avx512vbmi")]
860	unsafe fn test_mm512_multishift_epi64_epi8() {
861	let a = _mm512_set1_epi8(`1`);
862	let b = _mm512_set1_epi8(`1`);
863	let r = _mm512_multishift_epi64_epi8(a, b);
864	let e = _mm512_set1_epi8(`1` << `7`);
865	assert_eq_m512i(r, e);
866	}
867
868	#[simd_test(enable = "avx512vbmi")]
869	unsafe fn test_mm512_mask_multishift_epi64_epi8() {
870	let a = _mm512_set1_epi8(`1`);
871	let b = _mm512_set1_epi8(`1`);
872	let r = _mm512_mask_multishift_epi64_epi8(a, `0`, a, b);
873	assert_eq_m512i(r, a);
874	let r = _mm512_mask_multishift_epi64_epi8(
875	a,
876	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
877	a,
878	b,
879	);
880	let e = _mm512_set1_epi8(`1` << `7`);
881	assert_eq_m512i(r, e);
882	}
883
884	#[simd_test(enable = "avx512vbmi")]
885	unsafe fn test_mm512_maskz_multishift_epi64_epi8() {
886	let a = _mm512_set1_epi8(`1`);
887	let b = _mm512_set1_epi8(`1`);
888	let r = _mm512_maskz_multishift_epi64_epi8(`0`, a, b);
889	assert_eq_m512i(r, _mm512_setzero_si512());
890	let r = _mm512_maskz_multishift_epi64_epi8(
891	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
892	a,
893	b,
894	);
895	let e = _mm512_set1_epi8(`1` << `7`);
896	assert_eq_m512i(r, e);
897	}
898
899	#[simd_test(enable = "avx512vbmi,avx512vl")]
900	unsafe fn test_mm256_multishift_epi64_epi8() {
901	let a = _mm256_set1_epi8(`1`);
902	let b = _mm256_set1_epi8(`1`);
903	let r = _mm256_multishift_epi64_epi8(a, b);
904	let e = _mm256_set1_epi8(`1` << `7`);
905	assert_eq_m256i(r, e);
906	}
907
908	#[simd_test(enable = "avx512vbmi,avx512vl")]
909	unsafe fn test_mm256_mask_multishift_epi64_epi8() {
910	let a = _mm256_set1_epi8(`1`);
911	let b = _mm256_set1_epi8(`1`);
912	let r = _mm256_mask_multishift_epi64_epi8(a, `0`, a, b);
913	assert_eq_m256i(r, a);
914	let r = _mm256_mask_multishift_epi64_epi8(a, `0b11111111_11111111_11111111_11111111`, a, b);
915	let e = _mm256_set1_epi8(`1` << `7`);
916	assert_eq_m256i(r, e);
917	}
918
919	#[simd_test(enable = "avx512vbmi,avx512vl")]
920	unsafe fn test_mm256_maskz_multishift_epi64_epi8() {
921	let a = _mm256_set1_epi8(`1`);
922	let b = _mm256_set1_epi8(`1`);
923	let r = _mm256_maskz_multishift_epi64_epi8(`0`, a, b);
924	assert_eq_m256i(r, _mm256_setzero_si256());
925	let r = _mm256_maskz_multishift_epi64_epi8(`0b11111111_11111111_11111111_11111111`, a, b);
926	let e = _mm256_set1_epi8(`1` << `7`);
927	assert_eq_m256i(r, e);
928	}
929
930	#[simd_test(enable = "avx512vbmi,avx512vl")]
931	unsafe fn test_mm_multishift_epi64_epi8() {
932	let a = _mm_set1_epi8(`1`);
933	let b = _mm_set1_epi8(`1`);
934	let r = _mm_multishift_epi64_epi8(a, b);
935	let e = _mm_set1_epi8(`1` << `7`);
936	assert_eq_m128i(r, e);
937	}
938
939	#[simd_test(enable = "avx512vbmi,avx512vl")]
940	unsafe fn test_mm_mask_multishift_epi64_epi8() {
941	let a = _mm_set1_epi8(`1`);
942	let b = _mm_set1_epi8(`1`);
943	let r = _mm_mask_multishift_epi64_epi8(a, `0`, a, b);
944	assert_eq_m128i(r, a);
945	let r = _mm_mask_multishift_epi64_epi8(a, `0b11111111_11111111`, a, b);
946	let e = _mm_set1_epi8(`1` << `7`);
947	assert_eq_m128i(r, e);
948	}
949
950	#[simd_test(enable = "avx512vbmi,avx512vl")]
951	unsafe fn test_mm_maskz_multishift_epi64_epi8() {
952	let a = _mm_set1_epi8(`1`);
953	let b = _mm_set1_epi8(`1`);
954	let r = _mm_maskz_multishift_epi64_epi8(`0`, a, b);
955	assert_eq_m128i(r, _mm_setzero_si128());
956	let r = _mm_maskz_multishift_epi64_epi8(`0b11111111_11111111`, a, b);
957	let e = _mm_set1_epi8(`1` << `7`);
958	assert_eq_m128i(r, e);
959	}
960	}
961