avx512vbmi.rs source code [crates/core_arch/src/x86/avx512vbmi.rs]

1	use crate::core_arch::{simd::, simd_llvm::, x86::*};
2
3	#[cfg(test)]
4	use stdarch_test::assert_instr;
5
6	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7	///
8	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi8&expand=4262)
9	#[inline]
10	#[target_feature(enable = "avx512vbmi")]
11	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
12	pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
13	transmute(src:vpermi2b(a:a.as_i8x64(), idx:idx.as_i8x64(), b:b.as_i8x64()))
14	}
15
16	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
17	///
18	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi8&expand=4259)
19	#[inline]
20	#[target_feature(enable = "avx512vbmi")]
21	#[cfg_attr(test, assert_instr(vpermt2b))]
22	pub unsafe fn _mm512_mask_permutex2var_epi8(
23	a: __m512i,
24	k: __mmask64,
25	idx: __m512i,
26	b: __m512i,
27	) -> __m512i {
28	let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
29	transmute(src:simd_select_bitmask(m:k, a:permute, b:a.as_i8x64()))
30	}
31
32	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
33	///
34	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi8&expand=4261)
35	#[inline]
36	#[target_feature(enable = "avx512vbmi")]
37	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
38	pub unsafe fn _mm512_maskz_permutex2var_epi8(
39	k: __mmask64,
40	a: __m512i,
41	idx: __m512i,
42	b: __m512i,
43	) -> __m512i {
44	let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
45	let zero: i8x64 = _mm512_setzero_si512().as_i8x64();
46	transmute(src:simd_select_bitmask(m:k, a:permute, b:zero))
47	}
48
49	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
50	///
51	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi8&expand=4260)
52	#[inline]
53	#[target_feature(enable = "avx512vbmi")]
54	#[cfg_attr(test, assert_instr(vpermi2b))]
55	pub unsafe fn _mm512_mask2_permutex2var_epi8(
56	a: __m512i,
57	idx: __m512i,
58	k: __mmask64,
59	b: __m512i,
60	) -> __m512i {
61	let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
62	transmute(src:simd_select_bitmask(m:k, a:permute, b:idx.as_i8x64()))
63	}
64
65	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
66	///
67	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi8&expand=4258)
68	#[inline]
69	#[target_feature(enable = "avx512vbmi,avx512vl")]
70	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
71	pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
72	transmute(src:vpermi2b256(a:a.as_i8x32(), idx:idx.as_i8x32(), b:b.as_i8x32()))
73	}
74
75	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
76	///
77	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi8&expand=4255)
78	#[inline]
79	#[target_feature(enable = "avx512vbmi,avx512vl")]
80	#[cfg_attr(test, assert_instr(vpermt2b))]
81	pub unsafe fn _mm256_mask_permutex2var_epi8(
82	a: __m256i,
83	k: __mmask32,
84	idx: __m256i,
85	b: __m256i,
86	) -> __m256i {
87	let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
88	transmute(src:simd_select_bitmask(m:k, a:permute, b:a.as_i8x32()))
89	}
90
91	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
92	///
93	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi8&expand=4257)
94	#[inline]
95	#[target_feature(enable = "avx512vbmi,avx512vl")]
96	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
97	pub unsafe fn _mm256_maskz_permutex2var_epi8(
98	k: __mmask32,
99	a: __m256i,
100	idx: __m256i,
101	b: __m256i,
102	) -> __m256i {
103	let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
104	let zero: i8x32 = _mm256_setzero_si256().as_i8x32();
105	transmute(src:simd_select_bitmask(m:k, a:permute, b:zero))
106	}
107
108	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
109	///
110	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi8&expand=4256)
111	#[inline]
112	#[target_feature(enable = "avx512vbmi,avx512vl")]
113	#[cfg_attr(test, assert_instr(vpermi2b))]
114	pub unsafe fn _mm256_mask2_permutex2var_epi8(
115	a: __m256i,
116	idx: __m256i,
117	k: __mmask32,
118	b: __m256i,
119	) -> __m256i {
120	let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
121	transmute(src:simd_select_bitmask(m:k, a:permute, b:idx.as_i8x32()))
122	}
123
124	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
125	///
126	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi8&expand=4254)
127	#[inline]
128	#[target_feature(enable = "avx512vbmi,avx512vl")]
129	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
130	pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
131	transmute(src:vpermi2b128(a:a.as_i8x16(), idx:idx.as_i8x16(), b:b.as_i8x16()))
132	}
133
134	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
135	///
136	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi8&expand=4251)
137	#[inline]
138	#[target_feature(enable = "avx512vbmi,avx512vl")]
139	#[cfg_attr(test, assert_instr(vpermt2b))]
140	pub unsafe fn _mm_mask_permutex2var_epi8(
141	a: __m128i,
142	k: __mmask16,
143	idx: __m128i,
144	b: __m128i,
145	) -> __m128i {
146	let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
147	transmute(src:simd_select_bitmask(m:k, a:permute, b:a.as_i8x16()))
148	}
149
150	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
151	///
152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi8&expand=4253)
153	#[inline]
154	#[target_feature(enable = "avx512vbmi,avx512vl")]
155	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
156	pub unsafe fn _mm_maskz_permutex2var_epi8(
157	k: __mmask16,
158	a: __m128i,
159	idx: __m128i,
160	b: __m128i,
161	) -> __m128i {
162	let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
163	let zero: i8x16 = _mm_setzero_si128().as_i8x16();
164	transmute(src:simd_select_bitmask(m:k, a:permute, b:zero))
165	}
166
167	/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
168	///
169	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi8&expand=4252)
170	#[inline]
171	#[target_feature(enable = "avx512vbmi,avx512vl")]
172	#[cfg_attr(test, assert_instr(vpermi2b))]
173	pub unsafe fn _mm_mask2_permutex2var_epi8(
174	a: __m128i,
175	idx: __m128i,
176	k: __mmask16,
177	b: __m128i,
178	) -> __m128i {
179	let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
180	transmute(src:simd_select_bitmask(m:k, a:permute, b:idx.as_i8x16()))
181	}
182
183	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
184	///
185	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi8&expand=4316)
186	#[inline]
187	#[target_feature(enable = "avx512vbmi")]
188	#[cfg_attr(test, assert_instr(vpermb))]
189	pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
190	transmute(src:vpermb(a:a.as_i8x64(), idx:idx.as_i8x64()))
191	}
192
193	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
194	///
195	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi8&expand=4314)
196	#[inline]
197	#[target_feature(enable = "avx512vbmi")]
198	#[cfg_attr(test, assert_instr(vpermb))]
199	pub unsafe fn _mm512_mask_permutexvar_epi8(
200	src: __m512i,
201	k: __mmask64,
202	idx: __m512i,
203	a: __m512i,
204	) -> __m512i {
205	let permute: i8x64 = _mm512_permutexvar_epi8(idx, a).as_i8x64();
206	transmute(src:simd_select_bitmask(m:k, a:permute, b:src.as_i8x64()))
207	}
208
209	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
210	///
211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi8&expand=4315)
212	#[inline]
213	#[target_feature(enable = "avx512vbmi")]
214	#[cfg_attr(test, assert_instr(vpermb))]
215	pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i {
216	let permute: i8x64 = _mm512_permutexvar_epi8(idx, a).as_i8x64();
217	let zero: i8x64 = _mm512_setzero_si512().as_i8x64();
218	transmute(src:simd_select_bitmask(m:k, a:permute, b:zero))
219	}
220
221	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
222	///
223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi8&expand=4313)
224	#[inline]
225	#[target_feature(enable = "avx512vbmi,avx512vl")]
226	#[cfg_attr(test, assert_instr(vpermb))]
227	pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
228	transmute(src:vpermb256(a:a.as_i8x32(), idx:idx.as_i8x32()))
229	}
230
231	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
232	///
233	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi8&expand=4311)
234	#[inline]
235	#[target_feature(enable = "avx512vbmi,avx512vl")]
236	#[cfg_attr(test, assert_instr(vpermb))]
237	pub unsafe fn _mm256_mask_permutexvar_epi8(
238	src: __m256i,
239	k: __mmask32,
240	idx: __m256i,
241	a: __m256i,
242	) -> __m256i {
243	let permute: i8x32 = _mm256_permutexvar_epi8(idx, a).as_i8x32();
244	transmute(src:simd_select_bitmask(m:k, a:permute, b:src.as_i8x32()))
245	}
246
247	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
248	///
249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi8&expand=4312)
250	#[inline]
251	#[target_feature(enable = "avx512vbmi,avx512vl")]
252	#[cfg_attr(test, assert_instr(vpermb))]
253	pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i {
254	let permute: i8x32 = _mm256_permutexvar_epi8(idx, a).as_i8x32();
255	let zero: i8x32 = _mm256_setzero_si256().as_i8x32();
256	transmute(src:simd_select_bitmask(m:k, a:permute, b:zero))
257	}
258
259	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
260	///
261	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi8&expand=4310)
262	#[inline]
263	#[target_feature(enable = "avx512vbmi,avx512vl")]
264	#[cfg_attr(test, assert_instr(vpermb))]
265	pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
266	transmute(src:vpermb128(a:a.as_i8x16(), idx:idx.as_i8x16()))
267	}
268
269	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
270	///
271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi8&expand=4308)
272	#[inline]
273	#[target_feature(enable = "avx512vbmi,avx512vl")]
274	#[cfg_attr(test, assert_instr(vpermb))]
275	pub unsafe fn _mm_mask_permutexvar_epi8(
276	src: __m128i,
277	k: __mmask16,
278	idx: __m128i,
279	a: __m128i,
280	) -> __m128i {
281	let permute: i8x16 = _mm_permutexvar_epi8(idx, a).as_i8x16();
282	transmute(src:simd_select_bitmask(m:k, a:permute, b:src.as_i8x16()))
283	}
284
285	/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
286	///
287	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi8&expand=4309)
288	#[inline]
289	#[target_feature(enable = "avx512vbmi,avx512vl")]
290	#[cfg_attr(test, assert_instr(vpermb))]
291	pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
292	let permute: i8x16 = _mm_permutexvar_epi8(idx, a).as_i8x16();
293	let zero: i8x16 = _mm_setzero_si128().as_i8x16();
294	transmute(src:simd_select_bitmask(m:k, a:permute, b:zero))
295	}
296
297	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
298	///
299	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_multishift_epi64_epi8&expand=4026)
300	#[inline]
301	#[target_feature(enable = "avx512vbmi")]
302	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
303	pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
304	transmute(src:vpmultishiftqb(a:a.as_i8x64(), b:b.as_i8x64()))
305	}
306
307	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
308	///
309	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_multishift_epi64_epi8&expand=4024)
310	#[inline]
311	#[target_feature(enable = "avx512vbmi")]
312	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
313	pub unsafe fn _mm512_mask_multishift_epi64_epi8(
314	src: __m512i,
315	k: __mmask64,
316	a: __m512i,
317	b: __m512i,
318	) -> __m512i {
319	let multishift: i8x64 = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
320	transmute(src:simd_select_bitmask(m:k, a:multishift, b:src.as_i8x64()))
321	}
322
323	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
324	///
325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_multishift_epi64_epi8&expand=4025)
326	#[inline]
327	#[target_feature(enable = "avx512vbmi")]
328	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
329	pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
330	let multishift: i8x64 = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
331	let zero: i8x64 = _mm512_setzero_si512().as_i8x64();
332	transmute(src:simd_select_bitmask(m:k, a:multishift, b:zero))
333	}
334
335	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
336	///
337	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_multishift_epi64_epi8&expand=4023)
338	#[inline]
339	#[target_feature(enable = "avx512vbmi,avx512vl")]
340	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
341	pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
342	transmute(src:vpmultishiftqb256(a:a.as_i8x32(), b:b.as_i8x32()))
343	}
344
345	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
346	///
347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_multishift_epi64_epi8&expand=4021)
348	#[inline]
349	#[target_feature(enable = "avx512vbmi,avx512vl")]
350	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
351	pub unsafe fn _mm256_mask_multishift_epi64_epi8(
352	src: __m256i,
353	k: __mmask32,
354	a: __m256i,
355	b: __m256i,
356	) -> __m256i {
357	let multishift: i8x32 = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
358	transmute(src:simd_select_bitmask(m:k, a:multishift, b:src.as_i8x32()))
359	}
360
361	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
362	///
363	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_multishift_epi64_epi8&expand=4022)
364	#[inline]
365	#[target_feature(enable = "avx512vbmi,avx512vl")]
366	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
367	pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
368	let multishift: i8x32 = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
369	let zero: i8x32 = _mm256_setzero_si256().as_i8x32();
370	transmute(src:simd_select_bitmask(m:k, a:multishift, b:zero))
371	}
372
373	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
374	///
375	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_multishift_epi64_epi8&expand=4020)
376	#[inline]
377	#[target_feature(enable = "avx512vbmi,avx512vl")]
378	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
379	pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
380	transmute(src:vpmultishiftqb128(a:a.as_i8x16(), b:b.as_i8x16()))
381	}
382
383	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
384	///
385	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_multishift_epi64_epi8&expand=4018)
386	#[inline]
387	#[target_feature(enable = "avx512vbmi,avx512vl")]
388	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
389	pub unsafe fn _mm_mask_multishift_epi64_epi8(
390	src: __m128i,
391	k: __mmask16,
392	a: __m128i,
393	b: __m128i,
394	) -> __m128i {
395	let multishift: i8x16 = _mm_multishift_epi64_epi8(a, b).as_i8x16();
396	transmute(src:simd_select_bitmask(m:k, a:multishift, b:src.as_i8x16()))
397	}
398
399	/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
400	///
401	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_multishift_epi64_epi8&expand=4019)
402	#[inline]
403	#[target_feature(enable = "avx512vbmi,avx512vl")]
404	#[cfg_attr(test, assert_instr(vpmultishiftqb))]
405	pub unsafe fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
406	let multishift: i8x16 = _mm_multishift_epi64_epi8(a, b).as_i8x16();
407	let zero: i8x16 = _mm_setzero_si128().as_i8x16();
408	transmute(src:simd_select_bitmask(m:k, a:multishift, b:zero))
409	}
410
411	#[allow(improper_ctypes)]
412	extern "C" {
413	#[link_name = "llvm.x86.avx512.vpermi2var.qi.512"]
414	fn vpermi2b(a: i8x64, idx: i8x64, b: i8x64) -> i8x64;
415	#[link_name = "llvm.x86.avx512.vpermi2var.qi.256"]
416	fn vpermi2b256(a: i8x32, idx: i8x32, b: i8x32) -> i8x32;
417	#[link_name = "llvm.x86.avx512.vpermi2var.qi.128"]
418	fn vpermi2b128(a: i8x16, idx: i8x16, b: i8x16) -> i8x16;
419
420	#[link_name = "llvm.x86.avx512.permvar.qi.512"]
421	fn vpermb(a: i8x64, idx: i8x64) -> i8x64;
422	#[link_name = "llvm.x86.avx512.permvar.qi.256"]
423	fn vpermb256(a: i8x32, idx: i8x32) -> i8x32;
424	#[link_name = "llvm.x86.avx512.permvar.qi.128"]
425	fn vpermb128(a: i8x16, idx: i8x16) -> i8x16;
426
427	#[link_name = "llvm.x86.avx512.pmultishift.qb.512"]
428	fn vpmultishiftqb(a: i8x64, b: i8x64) -> i8x64;
429	#[link_name = "llvm.x86.avx512.pmultishift.qb.256"]
430	fn vpmultishiftqb256(a: i8x32, b: i8x32) -> i8x32;
431	#[link_name = "llvm.x86.avx512.pmultishift.qb.128"]
432	fn vpmultishiftqb128(a: i8x16, b: i8x16) -> i8x16;
433	}
434
435	#[cfg(test)]
436	mod tests {
437
438	use stdarch_test::simd_test;
439
440	use crate::core_arch::x86::*;
441
442	#[simd_test(enable = "avx512vbmi")]
443	unsafe fn test_mm512_permutex2var_epi8() {
444	#[rustfmt::skip]
445	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
446	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
447	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
448	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
449	#[rustfmt::skip]
450	let idx = _mm512_set_epi8(`1`, `1`<<`6`, `2`, `1`<<`6`, `3`, `1`<<`6`, `4`, `1`<<`6`, `5`, `1`<<`6`, `6`, `1`<<`6`, `7`, `1`<<`6`, `8`, `1`<<`6`,
451	`9`, `1`<<`6`, `10`, `1`<<`6`, `11`, `1`<<`6`, `12`, `1`<<`6`, `13`, `1`<<`6`, `14`, `1`<<`6`, `15`, `1`<<`6`, `16`, `1`<<`6`,
452	`17`, `1`<<`6`, `18`, `1`<<`6`, `19`, `1`<<`6`, `20`, `1`<<`6`, `21`, `1`<<`6`, `22`, `1`<<`6`, `23`, `1`<<`6`, `24`, `1`<<`6`,
453	`25`, `1`<<`6`, `26`, `1`<<`6`, `27`, `1`<<`6`, `28`, `1`<<`6`, `29`, `1`<<`6`, `30`, `1`<<`6`, `31`, `1`<<`6`, `32`, `1`<<`6`);
454	let b = _mm512_set1_epi8(`100`);
455	let r = _mm512_permutex2var_epi8(a, idx, b);
456	#[rustfmt::skip]
457	let e = _mm512_set_epi8(
458	`62`, `100`, `61`, `100`, `60`, `100`, `59`, `100`, `58`, `100`, `57`, `100`, `56`, `100`, `55`, `100`,
459	`54`, `100`, `53`, `100`, `52`, `100`, `51`, `100`, `50`, `100`, `49`, `100`, `48`, `100`, `47`, `100`,
460	`46`, `100`, `45`, `100`, `44`, `100`, `43`, `100`, `42`, `100`, `41`, `100`, `40`, `100`, `39`, `100`,
461	`38`, `100`, `37`, `100`, `36`, `100`, `35`, `100`, `34`, `100`, `33`, `100`, `32`, `100`, `31`, `100`,
462	);
463	assert_eq_m512i(r, e);
464	}
465
466	#[simd_test(enable = "avx512vbmi")]
467	unsafe fn test_mm512_mask_permutex2var_epi8() {
468	#[rustfmt::skip]
469	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
470	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
471	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
472	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
473	#[rustfmt::skip]
474	let idx = _mm512_set_epi8(`1`, `1`<<`6`, `2`, `1`<<`6`, `3`, `1`<<`6`, `4`, `1`<<`6`, `5`, `1`<<`6`, `6`, `1`<<`6`, `7`, `1`<<`6`, `8`, `1`<<`6`,
475	`9`, `1`<<`6`, `10`, `1`<<`6`, `11`, `1`<<`6`, `12`, `1`<<`6`, `13`, `1`<<`6`, `14`, `1`<<`6`, `15`, `1`<<`6`, `16`, `1`<<`6`,
476	`17`, `1`<<`6`, `18`, `1`<<`6`, `19`, `1`<<`6`, `20`, `1`<<`6`, `21`, `1`<<`6`, `22`, `1`<<`6`, `23`, `1`<<`6`, `24`, `1`<<`6`,
477	`25`, `1`<<`6`, `26`, `1`<<`6`, `27`, `1`<<`6`, `28`, `1`<<`6`, `29`, `1`<<`6`, `30`, `1`<<`6`, `31`, `1`<<`6`, `32`, `1`<<`6`);
478	let b = _mm512_set1_epi8(`100`);
479	let r = _mm512_mask_permutex2var_epi8(a, `0`, idx, b);
480	assert_eq_m512i(r, a);
481	let r = _mm512_mask_permutex2var_epi8(
482	a,
483	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
484	idx,
485	b,
486	);
487	#[rustfmt::skip]
488	let e = _mm512_set_epi8(
489	`62`, `100`, `61`, `100`, `60`, `100`, `59`, `100`, `58`, `100`, `57`, `100`, `56`, `100`, `55`, `100`,
490	`54`, `100`, `53`, `100`, `52`, `100`, `51`, `100`, `50`, `100`, `49`, `100`, `48`, `100`, `47`, `100`,
491	`46`, `100`, `45`, `100`, `44`, `100`, `43`, `100`, `42`, `100`, `41`, `100`, `40`, `100`, `39`, `100`,
492	`38`, `100`, `37`, `100`, `36`, `100`, `35`, `100`, `34`, `100`, `33`, `100`, `32`, `100`, `31`, `100`,
493	);
494	assert_eq_m512i(r, e);
495	}
496
497	#[simd_test(enable = "avx512vbmi")]
498	unsafe fn test_mm512_maskz_permutex2var_epi8() {
499	#[rustfmt::skip]
500	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
501	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
502	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
503	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
504	#[rustfmt::skip]
505	let idx = _mm512_set_epi8(`1`, `1`<<`6`, `2`, `1`<<`6`, `3`, `1`<<`6`, `4`, `1`<<`6`, `5`, `1`<<`6`, `6`, `1`<<`6`, `7`, `1`<<`6`, `8`, `1`<<`6`,
506	`9`, `1`<<`6`, `10`, `1`<<`6`, `11`, `1`<<`6`, `12`, `1`<<`6`, `13`, `1`<<`6`, `14`, `1`<<`6`, `15`, `1`<<`6`, `16`, `1`<<`6`,
507	`17`, `1`<<`6`, `18`, `1`<<`6`, `19`, `1`<<`6`, `20`, `1`<<`6`, `21`, `1`<<`6`, `22`, `1`<<`6`, `23`, `1`<<`6`, `24`, `1`<<`6`,
508	`25`, `1`<<`6`, `26`, `1`<<`6`, `27`, `1`<<`6`, `28`, `1`<<`6`, `29`, `1`<<`6`, `30`, `1`<<`6`, `31`, `1`<<`6`, `32`, `1`<<`6`);
509	let b = _mm512_set1_epi8(`100`);
510	let r = _mm512_maskz_permutex2var_epi8(`0`, a, idx, b);
511	assert_eq_m512i(r, _mm512_setzero_si512());
512	let r = _mm512_maskz_permutex2var_epi8(
513	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
514	a,
515	idx,
516	b,
517	);
518	#[rustfmt::skip]
519	let e = _mm512_set_epi8(
520	`62`, `100`, `61`, `100`, `60`, `100`, `59`, `100`, `58`, `100`, `57`, `100`, `56`, `100`, `55`, `100`,
521	`54`, `100`, `53`, `100`, `52`, `100`, `51`, `100`, `50`, `100`, `49`, `100`, `48`, `100`, `47`, `100`,
522	`46`, `100`, `45`, `100`, `44`, `100`, `43`, `100`, `42`, `100`, `41`, `100`, `40`, `100`, `39`, `100`,
523	`38`, `100`, `37`, `100`, `36`, `100`, `35`, `100`, `34`, `100`, `33`, `100`, `32`, `100`, `31`, `100`,
524	);
525	assert_eq_m512i(r, e);
526	}
527
528	#[simd_test(enable = "avx512vbmi")]
529	unsafe fn test_mm512_mask2_permutex2var_epi8() {
530	#[rustfmt::skip]
531	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
532	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
533	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
534	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
535	#[rustfmt::skip]
536	let idx = _mm512_set_epi8(`1`, `1`<<`6`, `2`, `1`<<`6`, `3`, `1`<<`6`, `4`, `1`<<`6`, `5`, `1`<<`6`, `6`, `1`<<`6`, `7`, `1`<<`6`, `8`, `1`<<`6`,
537	`9`, `1`<<`6`, `10`, `1`<<`6`, `11`, `1`<<`6`, `12`, `1`<<`6`, `13`, `1`<<`6`, `14`, `1`<<`6`, `15`, `1`<<`6`, `16`, `1`<<`6`,
538	`17`, `1`<<`6`, `18`, `1`<<`6`, `19`, `1`<<`6`, `20`, `1`<<`6`, `21`, `1`<<`6`, `22`, `1`<<`6`, `23`, `1`<<`6`, `24`, `1`<<`6`,
539	`25`, `1`<<`6`, `26`, `1`<<`6`, `27`, `1`<<`6`, `28`, `1`<<`6`, `29`, `1`<<`6`, `30`, `1`<<`6`, `31`, `1`<<`6`, `32`, `1`<<`6`);
540	let b = _mm512_set1_epi8(`100`);
541	let r = _mm512_mask2_permutex2var_epi8(a, idx, `0`, b);
542	assert_eq_m512i(r, idx);
543	let r = _mm512_mask2_permutex2var_epi8(
544	a,
545	idx,
546	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
547	b,
548	);
549	#[rustfmt::skip]
550	let e = _mm512_set_epi8(
551	`62`, `100`, `61`, `100`, `60`, `100`, `59`, `100`, `58`, `100`, `57`, `100`, `56`, `100`, `55`, `100`,
552	`54`, `100`, `53`, `100`, `52`, `100`, `51`, `100`, `50`, `100`, `49`, `100`, `48`, `100`, `47`, `100`,
553	`46`, `100`, `45`, `100`, `44`, `100`, `43`, `100`, `42`, `100`, `41`, `100`, `40`, `100`, `39`, `100`,
554	`38`, `100`, `37`, `100`, `36`, `100`, `35`, `100`, `34`, `100`, `33`, `100`, `32`, `100`, `31`, `100`,
555	);
556	assert_eq_m512i(r, e);
557	}
558
559	#[simd_test(enable = "avx512vbmi,avx512vl")]
560	unsafe fn test_mm256_permutex2var_epi8() {
561	#[rustfmt::skip]
562	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
563	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
564	#[rustfmt::skip]
565	let idx = _mm256_set_epi8(`1`, `1`<<`5`, `2`, `1`<<`5`, `3`, `1`<<`5`, `4`, `1`<<`5`, `5`, `1`<<`5`, `6`, `1`<<`5`, `7`, `1`<<`5`, `8`, `1`<<`5`,
566	`9`, `1`<<`5`, `10`, `1`<<`5`, `11`, `1`<<`5`, `12`, `1`<<`5`, `13`, `1`<<`5`, `14`, `1`<<`5`, `15`, `1`<<`5`, `16`, `1`<<`5`);
567	let b = _mm256_set1_epi8(`100`);
568	let r = _mm256_permutex2var_epi8(a, idx, b);
569	#[rustfmt::skip]
570	let e = _mm256_set_epi8(
571	`30`, `100`, `29`, `100`, `28`, `100`, `27`, `100`, `26`, `100`, `25`, `100`, `24`, `100`, `23`, `100`,
572	`22`, `100`, `21`, `100`, `20`, `100`, `19`, `100`, `18`, `100`, `17`, `100`, `16`, `100`, `15`, `100`,
573	);
574	assert_eq_m256i(r, e);
575	}
576
577	#[simd_test(enable = "avx512vbmi,avx512vl")]
578	unsafe fn test_mm256_mask_permutex2var_epi8() {
579	#[rustfmt::skip]
580	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
581	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
582	#[rustfmt::skip]
583	let idx = _mm256_set_epi8(`1`, `1`<<`5`, `2`, `1`<<`5`, `3`, `1`<<`5`, `4`, `1`<<`5`, `5`, `1`<<`5`, `6`, `1`<<`5`, `7`, `1`<<`5`, `8`, `1`<<`5`,
584	`9`, `1`<<`5`, `10`, `1`<<`5`, `11`, `1`<<`5`, `12`, `1`<<`5`, `13`, `1`<<`5`, `14`, `1`<<`5`, `15`, `1`<<`5`, `16`, `1`<<`5`);
585	let b = _mm256_set1_epi8(`100`);
586	let r = _mm256_mask_permutex2var_epi8(a, `0`, idx, b);
587	assert_eq_m256i(r, a);
588	let r = _mm256_mask_permutex2var_epi8(a, `0b11111111_11111111_11111111_11111111`, idx, b);
589	#[rustfmt::skip]
590	let e = _mm256_set_epi8(
591	`30`, `100`, `29`, `100`, `28`, `100`, `27`, `100`, `26`, `100`, `25`, `100`, `24`, `100`, `23`, `100`,
592	`22`, `100`, `21`, `100`, `20`, `100`, `19`, `100`, `18`, `100`, `17`, `100`, `16`, `100`, `15`, `100`,
593	);
594	assert_eq_m256i(r, e);
595	}
596
597	#[simd_test(enable = "avx512vbmi,avx512vl")]
598	unsafe fn test_mm256_maskz_permutex2var_epi8() {
599	#[rustfmt::skip]
600	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
601	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
602	#[rustfmt::skip]
603	let idx = _mm256_set_epi8(`1`, `1`<<`5`, `2`, `1`<<`5`, `3`, `1`<<`5`, `4`, `1`<<`5`, `5`, `1`<<`5`, `6`, `1`<<`5`, `7`, `1`<<`5`, `8`, `1`<<`5`,
604	`9`, `1`<<`5`, `10`, `1`<<`5`, `11`, `1`<<`5`, `12`, `1`<<`5`, `13`, `1`<<`5`, `14`, `1`<<`5`, `15`, `1`<<`5`, `16`, `1`<<`5`);
605	let b = _mm256_set1_epi8(`100`);
606	let r = _mm256_maskz_permutex2var_epi8(`0`, a, idx, b);
607	assert_eq_m256i(r, _mm256_setzero_si256());
608	let r = _mm256_maskz_permutex2var_epi8(`0b11111111_11111111_11111111_11111111`, a, idx, b);
609	#[rustfmt::skip]
610	let e = _mm256_set_epi8(
611	`30`, `100`, `29`, `100`, `28`, `100`, `27`, `100`, `26`, `100`, `25`, `100`, `24`, `100`, `23`, `100`,
612	`22`, `100`, `21`, `100`, `20`, `100`, `19`, `100`, `18`, `100`, `17`, `100`, `16`, `100`, `15`, `100`,
613	);
614	assert_eq_m256i(r, e);
615	}
616
617	#[simd_test(enable = "avx512vbmi,avx512vl")]
618	unsafe fn test_mm256_mask2_permutex2var_epi8() {
619	#[rustfmt::skip]
620	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
621	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
622	#[rustfmt::skip]
623	let idx = _mm256_set_epi8(`1`, `1`<<`5`, `2`, `1`<<`5`, `3`, `1`<<`5`, `4`, `1`<<`5`, `5`, `1`<<`5`, `6`, `1`<<`5`, `7`, `1`<<`5`, `8`, `1`<<`5`,
624	`9`, `1`<<`5`, `10`, `1`<<`5`, `11`, `1`<<`5`, `12`, `1`<<`5`, `13`, `1`<<`5`, `14`, `1`<<`5`, `15`, `1`<<`5`, `16`, `1`<<`5`);
625	let b = _mm256_set1_epi8(`100`);
626	let r = _mm256_mask2_permutex2var_epi8(a, idx, `0`, b);
627	assert_eq_m256i(r, idx);
628	let r = _mm256_mask2_permutex2var_epi8(a, idx, `0b11111111_11111111_11111111_11111111`, b);
629	#[rustfmt::skip]
630	let e = _mm256_set_epi8(
631	`30`, `100`, `29`, `100`, `28`, `100`, `27`, `100`, `26`, `100`, `25`, `100`, `24`, `100`, `23`, `100`,
632	`22`, `100`, `21`, `100`, `20`, `100`, `19`, `100`, `18`, `100`, `17`, `100`, `16`, `100`, `15`, `100`,
633	);
634	assert_eq_m256i(r, e);
635	}
636
637	#[simd_test(enable = "avx512vbmi,avx512vl")]
638	unsafe fn test_mm_permutex2var_epi8() {
639	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
640	#[rustfmt::skip]
641	let idx = _mm_set_epi8(`1`, `1` << `4`, `2`, `1` << `4`, `3`, `1` << `4`, `4`, `1` << `4`, `5`, `1` << `4`, `6`, `1` << `4`, `7`, `1` << `4`, `8`, `1` << `4`);
642	let b = _mm_set1_epi8(`100`);
643	let r = _mm_permutex2var_epi8(a, idx, b);
644	let e = _mm_set_epi8(
645	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
646	);
647	assert_eq_m128i(r, e);
648	}
649
650	#[simd_test(enable = "avx512vbmi,avx512vl")]
651	unsafe fn test_mm_mask_permutex2var_epi8() {
652	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
653	#[rustfmt::skip]
654	let idx = _mm_set_epi8(`1`, `1` << `4`, `2`, `1` << `4`, `3`, `1` << `4`, `4`, `1` << `4`, `5`, `1` << `4`, `6`, `1` << `4`, `7`, `1` << `4`, `8`, `1` << `4`);
655	let b = _mm_set1_epi8(`100`);
656	let r = _mm_mask_permutex2var_epi8(a, `0`, idx, b);
657	assert_eq_m128i(r, a);
658	let r = _mm_mask_permutex2var_epi8(a, `0b11111111_11111111`, idx, b);
659	let e = _mm_set_epi8(
660	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
661	);
662	assert_eq_m128i(r, e);
663	}
664
665	#[simd_test(enable = "avx512vbmi,avx512vl")]
666	unsafe fn test_mm_maskz_permutex2var_epi8() {
667	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
668	#[rustfmt::skip]
669	let idx = _mm_set_epi8(`1`, `1` << `4`, `2`, `1` << `4`, `3`, `1` << `4`, `4`, `1` << `4`, `5`, `1` << `4`, `6`, `1` << `4`, `7`, `1` << `4`, `8`, `1` << `4`);
670	let b = _mm_set1_epi8(`100`);
671	let r = _mm_maskz_permutex2var_epi8(`0`, a, idx, b);
672	assert_eq_m128i(r, _mm_setzero_si128());
673	let r = _mm_maskz_permutex2var_epi8(`0b11111111_11111111`, a, idx, b);
674	let e = _mm_set_epi8(
675	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
676	);
677	assert_eq_m128i(r, e);
678	}
679
680	#[simd_test(enable = "avx512vbmi,avx512vl")]
681	unsafe fn test_mm_mask2_permutex2var_epi8() {
682	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
683	#[rustfmt::skip]
684	let idx = _mm_set_epi8(`1`, `1` << `4`, `2`, `1` << `4`, `3`, `1` << `4`, `4`, `1` << `4`, `5`, `1` << `4`, `6`, `1` << `4`, `7`, `1` << `4`, `8`, `1` << `4`);
685	let b = _mm_set1_epi8(`100`);
686	let r = _mm_mask2_permutex2var_epi8(a, idx, `0`, b);
687	assert_eq_m128i(r, idx);
688	let r = _mm_mask2_permutex2var_epi8(a, idx, `0b11111111_11111111`, b);
689	let e = _mm_set_epi8(
690	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
691	);
692	assert_eq_m128i(r, e);
693	}
694
695	#[simd_test(enable = "avx512vbmi")]
696	unsafe fn test_mm512_permutexvar_epi8() {
697	let idx = _mm512_set1_epi8(`1`);
698	#[rustfmt::skip]
699	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
700	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
701	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
702	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
703	let r = _mm512_permutexvar_epi8(idx, a);
704	let e = _mm512_set1_epi8(`62`);
705	assert_eq_m512i(r, e);
706	}
707
708	#[simd_test(enable = "avx512vbmi")]
709	unsafe fn test_mm512_mask_permutexvar_epi8() {
710	let idx = _mm512_set1_epi8(`1`);
711	#[rustfmt::skip]
712	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
713	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
714	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
715	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
716	let r = _mm512_mask_permutexvar_epi8(a, `0`, idx, a);
717	assert_eq_m512i(r, a);
718	let r = _mm512_mask_permutexvar_epi8(
719	a,
720	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
721	idx,
722	a,
723	);
724	let e = _mm512_set1_epi8(`62`);
725	assert_eq_m512i(r, e);
726	}
727
728	#[simd_test(enable = "avx512vbmi")]
729	unsafe fn test_mm512_maskz_permutexvar_epi8() {
730	let idx = _mm512_set1_epi8(`1`);
731	#[rustfmt::skip]
732	let a = _mm512_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
733	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
734	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
735	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`);
736	let r = _mm512_maskz_permutexvar_epi8(`0`, idx, a);
737	assert_eq_m512i(r, _mm512_setzero_si512());
738	let r = _mm512_maskz_permutexvar_epi8(
739	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
740	idx,
741	a,
742	);
743	let e = _mm512_set1_epi8(`62`);
744	assert_eq_m512i(r, e);
745	}
746
747	#[simd_test(enable = "avx512vbmi,avx512vl")]
748	unsafe fn test_mm256_permutexvar_epi8() {
749	let idx = _mm256_set1_epi8(`1`);
750	#[rustfmt::skip]
751	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
752	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
753	let r = _mm256_permutexvar_epi8(idx, a);
754	let e = _mm256_set1_epi8(`30`);
755	assert_eq_m256i(r, e);
756	}
757
758	#[simd_test(enable = "avx512vbmi,avx512vl")]
759	unsafe fn test_mm256_mask_permutexvar_epi8() {
760	let idx = _mm256_set1_epi8(`1`);
761	#[rustfmt::skip]
762	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
763	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
764	let r = _mm256_mask_permutexvar_epi8(a, `0`, idx, a);
765	assert_eq_m256i(r, a);
766	let r = _mm256_mask_permutexvar_epi8(a, `0b11111111_11111111_11111111_11111111`, idx, a);
767	let e = _mm256_set1_epi8(`30`);
768	assert_eq_m256i(r, e);
769	}
770
771	#[simd_test(enable = "avx512vbmi,avx512vl")]
772	unsafe fn test_mm256_maskz_permutexvar_epi8() {
773	let idx = _mm256_set1_epi8(`1`);
774	#[rustfmt::skip]
775	let a = _mm256_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
776	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`);
777	let r = _mm256_maskz_permutexvar_epi8(`0`, idx, a);
778	assert_eq_m256i(r, _mm256_setzero_si256());
779	let r = _mm256_maskz_permutexvar_epi8(`0b11111111_11111111_11111111_11111111`, idx, a);
780	let e = _mm256_set1_epi8(`30`);
781	assert_eq_m256i(r, e);
782	}
783
784	#[simd_test(enable = "avx512vbmi,avx512vl")]
785	unsafe fn test_mm_permutexvar_epi8() {
786	let idx = _mm_set1_epi8(`1`);
787	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
788	let r = _mm_permutexvar_epi8(idx, a);
789	let e = _mm_set1_epi8(`14`);
790	assert_eq_m128i(r, e);
791	}
792
793	#[simd_test(enable = "avx512vbmi,avx512vl")]
794	unsafe fn test_mm_mask_permutexvar_epi8() {
795	let idx = _mm_set1_epi8(`1`);
796	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
797	let r = _mm_mask_permutexvar_epi8(a, `0`, idx, a);
798	assert_eq_m128i(r, a);
799	let r = _mm_mask_permutexvar_epi8(a, `0b11111111_11111111`, idx, a);
800	let e = _mm_set1_epi8(`14`);
801	assert_eq_m128i(r, e);
802	}
803
804	#[simd_test(enable = "avx512vbmi,avx512vl")]
805	unsafe fn test_mm_maskz_permutexvar_epi8() {
806	let idx = _mm_set1_epi8(`1`);
807	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
808	let r = _mm_maskz_permutexvar_epi8(`0`, idx, a);
809	assert_eq_m128i(r, _mm_setzero_si128());
810	let r = _mm_maskz_permutexvar_epi8(`0b11111111_11111111`, idx, a);
811	let e = _mm_set1_epi8(`14`);
812	assert_eq_m128i(r, e);
813	}
814
815	#[simd_test(enable = "avx512vbmi")]
816	unsafe fn test_mm512_multishift_epi64_epi8() {
817	let a = _mm512_set1_epi8(`1`);
818	let b = _mm512_set1_epi8(`1`);
819	let r = _mm512_multishift_epi64_epi8(a, b);
820	let e = _mm512_set1_epi8(`1` << `7`);
821	assert_eq_m512i(r, e);
822	}
823
824	#[simd_test(enable = "avx512vbmi")]
825	unsafe fn test_mm512_mask_multishift_epi64_epi8() {
826	let a = _mm512_set1_epi8(`1`);
827	let b = _mm512_set1_epi8(`1`);
828	let r = _mm512_mask_multishift_epi64_epi8(a, `0`, a, b);
829	assert_eq_m512i(r, a);
830	let r = _mm512_mask_multishift_epi64_epi8(
831	a,
832	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
833	a,
834	b,
835	);
836	let e = _mm512_set1_epi8(`1` << `7`);
837	assert_eq_m512i(r, e);
838	}
839
840	#[simd_test(enable = "avx512vbmi")]
841	unsafe fn test_mm512_maskz_multishift_epi64_epi8() {
842	let a = _mm512_set1_epi8(`1`);
843	let b = _mm512_set1_epi8(`1`);
844	let r = _mm512_maskz_multishift_epi64_epi8(`0`, a, b);
845	assert_eq_m512i(r, _mm512_setzero_si512());
846	let r = _mm512_maskz_multishift_epi64_epi8(
847	`0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`,
848	a,
849	b,
850	);
851	let e = _mm512_set1_epi8(`1` << `7`);
852	assert_eq_m512i(r, e);
853	}
854
855	#[simd_test(enable = "avx512vbmi,avx512vl")]
856	unsafe fn test_mm256_multishift_epi64_epi8() {
857	let a = _mm256_set1_epi8(`1`);
858	let b = _mm256_set1_epi8(`1`);
859	let r = _mm256_multishift_epi64_epi8(a, b);
860	let e = _mm256_set1_epi8(`1` << `7`);
861	assert_eq_m256i(r, e);
862	}
863
864	#[simd_test(enable = "avx512vbmi,avx512vl")]
865	unsafe fn test_mm256_mask_multishift_epi64_epi8() {
866	let a = _mm256_set1_epi8(`1`);
867	let b = _mm256_set1_epi8(`1`);
868	let r = _mm256_mask_multishift_epi64_epi8(a, `0`, a, b);
869	assert_eq_m256i(r, a);
870	let r = _mm256_mask_multishift_epi64_epi8(a, `0b11111111_11111111_11111111_11111111`, a, b);
871	let e = _mm256_set1_epi8(`1` << `7`);
872	assert_eq_m256i(r, e);
873	}
874
875	#[simd_test(enable = "avx512vbmi,avx512vl")]
876	unsafe fn test_mm256_maskz_multishift_epi64_epi8() {
877	let a = _mm256_set1_epi8(`1`);
878	let b = _mm256_set1_epi8(`1`);
879	let r = _mm256_maskz_multishift_epi64_epi8(`0`, a, b);
880	assert_eq_m256i(r, _mm256_setzero_si256());
881	let r = _mm256_maskz_multishift_epi64_epi8(`0b11111111_11111111_11111111_11111111`, a, b);
882	let e = _mm256_set1_epi8(`1` << `7`);
883	assert_eq_m256i(r, e);
884	}
885
886	#[simd_test(enable = "avx512vbmi,avx512vl")]
887	unsafe fn test_mm_multishift_epi64_epi8() {
888	let a = _mm_set1_epi8(`1`);
889	let b = _mm_set1_epi8(`1`);
890	let r = _mm_multishift_epi64_epi8(a, b);
891	let e = _mm_set1_epi8(`1` << `7`);
892	assert_eq_m128i(r, e);
893	}
894
895	#[simd_test(enable = "avx512vbmi,avx512vl")]
896	unsafe fn test_mm_mask_multishift_epi64_epi8() {
897	let a = _mm_set1_epi8(`1`);
898	let b = _mm_set1_epi8(`1`);
899	let r = _mm_mask_multishift_epi64_epi8(a, `0`, a, b);
900	assert_eq_m128i(r, a);
901	let r = _mm_mask_multishift_epi64_epi8(a, `0b11111111_11111111`, a, b);
902	let e = _mm_set1_epi8(`1` << `7`);
903	assert_eq_m128i(r, e);
904	}
905
906	#[simd_test(enable = "avx512vbmi,avx512vl")]
907	unsafe fn test_mm_maskz_multishift_epi64_epi8() {
908	let a = _mm_set1_epi8(`1`);
909	let b = _mm_set1_epi8(`1`);
910	let r = _mm_maskz_multishift_epi64_epi8(`0`, a, b);
911	assert_eq_m128i(r, _mm_setzero_si128());
912	let r = _mm_maskz_multishift_epi64_epi8(`0b11111111_11111111`, a, b);
913	let e = _mm_set1_epi8(`1` << `7`);
914	assert_eq_m128i(r, e);
915	}
916	}
917