1use crate::core_arch::{simd::*, x86::*};
2use crate::intrinsics::simd::*;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8///
9/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi8&expand=4262)
10#[inline]
11#[target_feature(enable = "avx512vbmi")]
12#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
14pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
15 transmute(src:vpermi2b(a:a.as_i8x64(), idx:idx.as_i8x64(), b:b.as_i8x64()))
16}
17
18/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
19///
20/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi8&expand=4259)
21#[inline]
22#[target_feature(enable = "avx512vbmi")]
23#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24#[cfg_attr(test, assert_instr(vpermt2b))]
25pub unsafe fn _mm512_mask_permutex2var_epi8(
26 a: __m512i,
27 k: __mmask64,
28 idx: __m512i,
29 b: __m512i,
30) -> __m512i {
31 let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
32 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i8x64()))
33}
34
35/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36///
37/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi8&expand=4261)
38#[inline]
39#[target_feature(enable = "avx512vbmi")]
40#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
42pub unsafe fn _mm512_maskz_permutex2var_epi8(
43 k: __mmask64,
44 a: __m512i,
45 idx: __m512i,
46 b: __m512i,
47) -> __m512i {
48 let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
49 let zero: i8x64 = _mm512_setzero_si512().as_i8x64();
50 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
51}
52
53/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
54///
55/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi8&expand=4260)
56#[inline]
57#[target_feature(enable = "avx512vbmi")]
58#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
59#[cfg_attr(test, assert_instr(vpermi2b))]
60pub unsafe fn _mm512_mask2_permutex2var_epi8(
61 a: __m512i,
62 idx: __m512i,
63 k: __mmask64,
64 b: __m512i,
65) -> __m512i {
66 let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
67 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i8x64()))
68}
69
70/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
71///
72/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi8&expand=4258)
73#[inline]
74#[target_feature(enable = "avx512vbmi,avx512vl")]
75#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
76#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
77pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
78 transmute(src:vpermi2b256(a:a.as_i8x32(), idx:idx.as_i8x32(), b:b.as_i8x32()))
79}
80
81/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
82///
83/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi8&expand=4255)
84#[inline]
85#[target_feature(enable = "avx512vbmi,avx512vl")]
86#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
87#[cfg_attr(test, assert_instr(vpermt2b))]
88pub unsafe fn _mm256_mask_permutex2var_epi8(
89 a: __m256i,
90 k: __mmask32,
91 idx: __m256i,
92 b: __m256i,
93) -> __m256i {
94 let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
95 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i8x32()))
96}
97
98/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
99///
100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi8&expand=4257)
101#[inline]
102#[target_feature(enable = "avx512vbmi,avx512vl")]
103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
104#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
105pub unsafe fn _mm256_maskz_permutex2var_epi8(
106 k: __mmask32,
107 a: __m256i,
108 idx: __m256i,
109 b: __m256i,
110) -> __m256i {
111 let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
112 let zero: i8x32 = _mm256_setzero_si256().as_i8x32();
113 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
114}
115
116/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi8&expand=4256)
119#[inline]
120#[target_feature(enable = "avx512vbmi,avx512vl")]
121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
122#[cfg_attr(test, assert_instr(vpermi2b))]
123pub unsafe fn _mm256_mask2_permutex2var_epi8(
124 a: __m256i,
125 idx: __m256i,
126 k: __mmask32,
127 b: __m256i,
128) -> __m256i {
129 let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
130 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i8x32()))
131}
132
133/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
134///
135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi8&expand=4254)
136#[inline]
137#[target_feature(enable = "avx512vbmi,avx512vl")]
138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
139#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
140pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
141 transmute(src:vpermi2b128(a:a.as_i8x16(), idx:idx.as_i8x16(), b:b.as_i8x16()))
142}
143
144/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
145///
146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi8&expand=4251)
147#[inline]
148#[target_feature(enable = "avx512vbmi,avx512vl")]
149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
150#[cfg_attr(test, assert_instr(vpermt2b))]
151pub unsafe fn _mm_mask_permutex2var_epi8(
152 a: __m128i,
153 k: __mmask16,
154 idx: __m128i,
155 b: __m128i,
156) -> __m128i {
157 let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
158 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i8x16()))
159}
160
161/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
162///
163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi8&expand=4253)
164#[inline]
165#[target_feature(enable = "avx512vbmi,avx512vl")]
166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
167#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
168pub unsafe fn _mm_maskz_permutex2var_epi8(
169 k: __mmask16,
170 a: __m128i,
171 idx: __m128i,
172 b: __m128i,
173) -> __m128i {
174 let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
175 let zero: i8x16 = _mm_setzero_si128().as_i8x16();
176 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
177}
178
179/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
180///
181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi8&expand=4252)
182#[inline]
183#[target_feature(enable = "avx512vbmi,avx512vl")]
184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
185#[cfg_attr(test, assert_instr(vpermi2b))]
186pub unsafe fn _mm_mask2_permutex2var_epi8(
187 a: __m128i,
188 idx: __m128i,
189 k: __mmask16,
190 b: __m128i,
191) -> __m128i {
192 let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
193 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i8x16()))
194}
195
196/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
197///
198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi8&expand=4316)
199#[inline]
200#[target_feature(enable = "avx512vbmi")]
201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
202#[cfg_attr(test, assert_instr(vpermb))]
203pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
204 transmute(src:vpermb(a:a.as_i8x64(), idx:idx.as_i8x64()))
205}
206
207/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
208///
209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi8&expand=4314)
210#[inline]
211#[target_feature(enable = "avx512vbmi")]
212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
213#[cfg_attr(test, assert_instr(vpermb))]
214pub unsafe fn _mm512_mask_permutexvar_epi8(
215 src: __m512i,
216 k: __mmask64,
217 idx: __m512i,
218 a: __m512i,
219) -> __m512i {
220 let permute: i8x64 = _mm512_permutexvar_epi8(idx, a).as_i8x64();
221 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i8x64()))
222}
223
224/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
225///
226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi8&expand=4315)
227#[inline]
228#[target_feature(enable = "avx512vbmi")]
229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
230#[cfg_attr(test, assert_instr(vpermb))]
231pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i {
232 let permute: i8x64 = _mm512_permutexvar_epi8(idx, a).as_i8x64();
233 let zero: i8x64 = _mm512_setzero_si512().as_i8x64();
234 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
235}
236
237/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
238///
239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi8&expand=4313)
240#[inline]
241#[target_feature(enable = "avx512vbmi,avx512vl")]
242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
243#[cfg_attr(test, assert_instr(vpermb))]
244pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
245 transmute(src:vpermb256(a:a.as_i8x32(), idx:idx.as_i8x32()))
246}
247
248/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
249///
250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi8&expand=4311)
251#[inline]
252#[target_feature(enable = "avx512vbmi,avx512vl")]
253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
254#[cfg_attr(test, assert_instr(vpermb))]
255pub unsafe fn _mm256_mask_permutexvar_epi8(
256 src: __m256i,
257 k: __mmask32,
258 idx: __m256i,
259 a: __m256i,
260) -> __m256i {
261 let permute: i8x32 = _mm256_permutexvar_epi8(idx, a).as_i8x32();
262 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i8x32()))
263}
264
265/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
266///
267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi8&expand=4312)
268#[inline]
269#[target_feature(enable = "avx512vbmi,avx512vl")]
270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
271#[cfg_attr(test, assert_instr(vpermb))]
272pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i {
273 let permute: i8x32 = _mm256_permutexvar_epi8(idx, a).as_i8x32();
274 let zero: i8x32 = _mm256_setzero_si256().as_i8x32();
275 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
276}
277
278/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi8&expand=4310)
281#[inline]
282#[target_feature(enable = "avx512vbmi,avx512vl")]
283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
284#[cfg_attr(test, assert_instr(vpermb))]
285pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
286 transmute(src:vpermb128(a:a.as_i8x16(), idx:idx.as_i8x16()))
287}
288
289/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi8&expand=4308)
292#[inline]
293#[target_feature(enable = "avx512vbmi,avx512vl")]
294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
295#[cfg_attr(test, assert_instr(vpermb))]
296pub unsafe fn _mm_mask_permutexvar_epi8(
297 src: __m128i,
298 k: __mmask16,
299 idx: __m128i,
300 a: __m128i,
301) -> __m128i {
302 let permute: i8x16 = _mm_permutexvar_epi8(idx, a).as_i8x16();
303 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i8x16()))
304}
305
306/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
307///
308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi8&expand=4309)
309#[inline]
310#[target_feature(enable = "avx512vbmi,avx512vl")]
311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
312#[cfg_attr(test, assert_instr(vpermb))]
313pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
314 let permute: i8x16 = _mm_permutexvar_epi8(idx, a).as_i8x16();
315 let zero: i8x16 = _mm_setzero_si128().as_i8x16();
316 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
317}
318
319/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
320///
321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_multishift_epi64_epi8&expand=4026)
322#[inline]
323#[target_feature(enable = "avx512vbmi")]
324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
325#[cfg_attr(test, assert_instr(vpmultishiftqb))]
326pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
327 transmute(src:vpmultishiftqb(a:a.as_i8x64(), b:b.as_i8x64()))
328}
329
330/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
331///
332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_multishift_epi64_epi8&expand=4024)
333#[inline]
334#[target_feature(enable = "avx512vbmi")]
335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
336#[cfg_attr(test, assert_instr(vpmultishiftqb))]
337pub unsafe fn _mm512_mask_multishift_epi64_epi8(
338 src: __m512i,
339 k: __mmask64,
340 a: __m512i,
341 b: __m512i,
342) -> __m512i {
343 let multishift: i8x64 = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
344 transmute(src:simd_select_bitmask(m:k, yes:multishift, no:src.as_i8x64()))
345}
346
347/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
348///
349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_multishift_epi64_epi8&expand=4025)
350#[inline]
351#[target_feature(enable = "avx512vbmi")]
352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
353#[cfg_attr(test, assert_instr(vpmultishiftqb))]
354pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
355 let multishift: i8x64 = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
356 let zero: i8x64 = _mm512_setzero_si512().as_i8x64();
357 transmute(src:simd_select_bitmask(m:k, yes:multishift, no:zero))
358}
359
360/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
361///
362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_multishift_epi64_epi8&expand=4023)
363#[inline]
364#[target_feature(enable = "avx512vbmi,avx512vl")]
365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
366#[cfg_attr(test, assert_instr(vpmultishiftqb))]
367pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
368 transmute(src:vpmultishiftqb256(a:a.as_i8x32(), b:b.as_i8x32()))
369}
370
371/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
372///
373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_multishift_epi64_epi8&expand=4021)
374#[inline]
375#[target_feature(enable = "avx512vbmi,avx512vl")]
376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
377#[cfg_attr(test, assert_instr(vpmultishiftqb))]
378pub unsafe fn _mm256_mask_multishift_epi64_epi8(
379 src: __m256i,
380 k: __mmask32,
381 a: __m256i,
382 b: __m256i,
383) -> __m256i {
384 let multishift: i8x32 = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
385 transmute(src:simd_select_bitmask(m:k, yes:multishift, no:src.as_i8x32()))
386}
387
388/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
389///
390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_multishift_epi64_epi8&expand=4022)
391#[inline]
392#[target_feature(enable = "avx512vbmi,avx512vl")]
393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
394#[cfg_attr(test, assert_instr(vpmultishiftqb))]
395pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
396 let multishift: i8x32 = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
397 let zero: i8x32 = _mm256_setzero_si256().as_i8x32();
398 transmute(src:simd_select_bitmask(m:k, yes:multishift, no:zero))
399}
400
401/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
402///
403/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_multishift_epi64_epi8&expand=4020)
404#[inline]
405#[target_feature(enable = "avx512vbmi,avx512vl")]
406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
407#[cfg_attr(test, assert_instr(vpmultishiftqb))]
408pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
409 transmute(src:vpmultishiftqb128(a:a.as_i8x16(), b:b.as_i8x16()))
410}
411
412/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
413///
414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_multishift_epi64_epi8&expand=4018)
415#[inline]
416#[target_feature(enable = "avx512vbmi,avx512vl")]
417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
418#[cfg_attr(test, assert_instr(vpmultishiftqb))]
419pub unsafe fn _mm_mask_multishift_epi64_epi8(
420 src: __m128i,
421 k: __mmask16,
422 a: __m128i,
423 b: __m128i,
424) -> __m128i {
425 let multishift: i8x16 = _mm_multishift_epi64_epi8(a, b).as_i8x16();
426 transmute(src:simd_select_bitmask(m:k, yes:multishift, no:src.as_i8x16()))
427}
428
429/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_multishift_epi64_epi8&expand=4019)
432#[inline]
433#[target_feature(enable = "avx512vbmi,avx512vl")]
434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
435#[cfg_attr(test, assert_instr(vpmultishiftqb))]
436pub unsafe fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
437 let multishift: i8x16 = _mm_multishift_epi64_epi8(a, b).as_i8x16();
438 let zero: i8x16 = _mm_setzero_si128().as_i8x16();
439 transmute(src:simd_select_bitmask(m:k, yes:multishift, no:zero))
440}
441
442#[allow(improper_ctypes)]
443extern "C" {
444 #[link_name = "llvm.x86.avx512.vpermi2var.qi.512"]
445 fn vpermi2b(a: i8x64, idx: i8x64, b: i8x64) -> i8x64;
446 #[link_name = "llvm.x86.avx512.vpermi2var.qi.256"]
447 fn vpermi2b256(a: i8x32, idx: i8x32, b: i8x32) -> i8x32;
448 #[link_name = "llvm.x86.avx512.vpermi2var.qi.128"]
449 fn vpermi2b128(a: i8x16, idx: i8x16, b: i8x16) -> i8x16;
450
451 #[link_name = "llvm.x86.avx512.permvar.qi.512"]
452 fn vpermb(a: i8x64, idx: i8x64) -> i8x64;
453 #[link_name = "llvm.x86.avx512.permvar.qi.256"]
454 fn vpermb256(a: i8x32, idx: i8x32) -> i8x32;
455 #[link_name = "llvm.x86.avx512.permvar.qi.128"]
456 fn vpermb128(a: i8x16, idx: i8x16) -> i8x16;
457
458 #[link_name = "llvm.x86.avx512.pmultishift.qb.512"]
459 fn vpmultishiftqb(a: i8x64, b: i8x64) -> i8x64;
460 #[link_name = "llvm.x86.avx512.pmultishift.qb.256"]
461 fn vpmultishiftqb256(a: i8x32, b: i8x32) -> i8x32;
462 #[link_name = "llvm.x86.avx512.pmultishift.qb.128"]
463 fn vpmultishiftqb128(a: i8x16, b: i8x16) -> i8x16;
464}
465
466#[cfg(test)]
467mod tests {
468
469 use stdarch_test::simd_test;
470
471 use crate::core_arch::x86::*;
472
473 #[simd_test(enable = "avx512vbmi")]
474 unsafe fn test_mm512_permutex2var_epi8() {
475 #[rustfmt::skip]
476 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
477 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
478 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
479 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
480 #[rustfmt::skip]
481 let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6,
482 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
483 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
484 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
485 let b = _mm512_set1_epi8(100);
486 let r = _mm512_permutex2var_epi8(a, idx, b);
487 #[rustfmt::skip]
488 let e = _mm512_set_epi8(
489 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
490 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
491 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
492 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
493 );
494 assert_eq_m512i(r, e);
495 }
496
497 #[simd_test(enable = "avx512vbmi")]
498 unsafe fn test_mm512_mask_permutex2var_epi8() {
499 #[rustfmt::skip]
500 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
501 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
502 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
503 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
504 #[rustfmt::skip]
505 let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6,
506 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
507 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
508 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
509 let b = _mm512_set1_epi8(100);
510 let r = _mm512_mask_permutex2var_epi8(a, 0, idx, b);
511 assert_eq_m512i(r, a);
512 let r = _mm512_mask_permutex2var_epi8(
513 a,
514 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
515 idx,
516 b,
517 );
518 #[rustfmt::skip]
519 let e = _mm512_set_epi8(
520 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
521 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
522 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
523 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
524 );
525 assert_eq_m512i(r, e);
526 }
527
528 #[simd_test(enable = "avx512vbmi")]
529 unsafe fn test_mm512_maskz_permutex2var_epi8() {
530 #[rustfmt::skip]
531 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
532 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
533 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
534 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
535 #[rustfmt::skip]
536 let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6,
537 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
538 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
539 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
540 let b = _mm512_set1_epi8(100);
541 let r = _mm512_maskz_permutex2var_epi8(0, a, idx, b);
542 assert_eq_m512i(r, _mm512_setzero_si512());
543 let r = _mm512_maskz_permutex2var_epi8(
544 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
545 a,
546 idx,
547 b,
548 );
549 #[rustfmt::skip]
550 let e = _mm512_set_epi8(
551 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
552 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
553 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
554 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
555 );
556 assert_eq_m512i(r, e);
557 }
558
559 #[simd_test(enable = "avx512vbmi")]
560 unsafe fn test_mm512_mask2_permutex2var_epi8() {
561 #[rustfmt::skip]
562 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
563 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
564 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
565 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
566 #[rustfmt::skip]
567 let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6,
568 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
569 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
570 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
571 let b = _mm512_set1_epi8(100);
572 let r = _mm512_mask2_permutex2var_epi8(a, idx, 0, b);
573 assert_eq_m512i(r, idx);
574 let r = _mm512_mask2_permutex2var_epi8(
575 a,
576 idx,
577 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
578 b,
579 );
580 #[rustfmt::skip]
581 let e = _mm512_set_epi8(
582 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
583 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
584 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
585 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
586 );
587 assert_eq_m512i(r, e);
588 }
589
590 #[simd_test(enable = "avx512vbmi,avx512vl")]
591 unsafe fn test_mm256_permutex2var_epi8() {
592 #[rustfmt::skip]
593 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
594 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
595 #[rustfmt::skip]
596 let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
597 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
598 let b = _mm256_set1_epi8(100);
599 let r = _mm256_permutex2var_epi8(a, idx, b);
600 #[rustfmt::skip]
601 let e = _mm256_set_epi8(
602 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
603 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
604 );
605 assert_eq_m256i(r, e);
606 }
607
608 #[simd_test(enable = "avx512vbmi,avx512vl")]
609 unsafe fn test_mm256_mask_permutex2var_epi8() {
610 #[rustfmt::skip]
611 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
612 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
613 #[rustfmt::skip]
614 let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
615 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
616 let b = _mm256_set1_epi8(100);
617 let r = _mm256_mask_permutex2var_epi8(a, 0, idx, b);
618 assert_eq_m256i(r, a);
619 let r = _mm256_mask_permutex2var_epi8(a, 0b11111111_11111111_11111111_11111111, idx, b);
620 #[rustfmt::skip]
621 let e = _mm256_set_epi8(
622 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
623 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
624 );
625 assert_eq_m256i(r, e);
626 }
627
628 #[simd_test(enable = "avx512vbmi,avx512vl")]
629 unsafe fn test_mm256_maskz_permutex2var_epi8() {
630 #[rustfmt::skip]
631 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
632 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
633 #[rustfmt::skip]
634 let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
635 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
636 let b = _mm256_set1_epi8(100);
637 let r = _mm256_maskz_permutex2var_epi8(0, a, idx, b);
638 assert_eq_m256i(r, _mm256_setzero_si256());
639 let r = _mm256_maskz_permutex2var_epi8(0b11111111_11111111_11111111_11111111, a, idx, b);
640 #[rustfmt::skip]
641 let e = _mm256_set_epi8(
642 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
643 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
644 );
645 assert_eq_m256i(r, e);
646 }
647
648 #[simd_test(enable = "avx512vbmi,avx512vl")]
649 unsafe fn test_mm256_mask2_permutex2var_epi8() {
650 #[rustfmt::skip]
651 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
652 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
653 #[rustfmt::skip]
654 let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
655 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
656 let b = _mm256_set1_epi8(100);
657 let r = _mm256_mask2_permutex2var_epi8(a, idx, 0, b);
658 assert_eq_m256i(r, idx);
659 let r = _mm256_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111_11111111_11111111, b);
660 #[rustfmt::skip]
661 let e = _mm256_set_epi8(
662 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
663 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
664 );
665 assert_eq_m256i(r, e);
666 }
667
668 #[simd_test(enable = "avx512vbmi,avx512vl")]
669 unsafe fn test_mm_permutex2var_epi8() {
670 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
671 #[rustfmt::skip]
672 let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
673 let b = _mm_set1_epi8(100);
674 let r = _mm_permutex2var_epi8(a, idx, b);
675 let e = _mm_set_epi8(
676 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
677 );
678 assert_eq_m128i(r, e);
679 }
680
681 #[simd_test(enable = "avx512vbmi,avx512vl")]
682 unsafe fn test_mm_mask_permutex2var_epi8() {
683 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
684 #[rustfmt::skip]
685 let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
686 let b = _mm_set1_epi8(100);
687 let r = _mm_mask_permutex2var_epi8(a, 0, idx, b);
688 assert_eq_m128i(r, a);
689 let r = _mm_mask_permutex2var_epi8(a, 0b11111111_11111111, idx, b);
690 let e = _mm_set_epi8(
691 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
692 );
693 assert_eq_m128i(r, e);
694 }
695
696 #[simd_test(enable = "avx512vbmi,avx512vl")]
697 unsafe fn test_mm_maskz_permutex2var_epi8() {
698 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
699 #[rustfmt::skip]
700 let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
701 let b = _mm_set1_epi8(100);
702 let r = _mm_maskz_permutex2var_epi8(0, a, idx, b);
703 assert_eq_m128i(r, _mm_setzero_si128());
704 let r = _mm_maskz_permutex2var_epi8(0b11111111_11111111, a, idx, b);
705 let e = _mm_set_epi8(
706 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
707 );
708 assert_eq_m128i(r, e);
709 }
710
711 #[simd_test(enable = "avx512vbmi,avx512vl")]
712 unsafe fn test_mm_mask2_permutex2var_epi8() {
713 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
714 #[rustfmt::skip]
715 let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
716 let b = _mm_set1_epi8(100);
717 let r = _mm_mask2_permutex2var_epi8(a, idx, 0, b);
718 assert_eq_m128i(r, idx);
719 let r = _mm_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111, b);
720 let e = _mm_set_epi8(
721 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
722 );
723 assert_eq_m128i(r, e);
724 }
725
726 #[simd_test(enable = "avx512vbmi")]
727 unsafe fn test_mm512_permutexvar_epi8() {
728 let idx = _mm512_set1_epi8(1);
729 #[rustfmt::skip]
730 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
731 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
732 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
733 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
734 let r = _mm512_permutexvar_epi8(idx, a);
735 let e = _mm512_set1_epi8(62);
736 assert_eq_m512i(r, e);
737 }
738
739 #[simd_test(enable = "avx512vbmi")]
740 unsafe fn test_mm512_mask_permutexvar_epi8() {
741 let idx = _mm512_set1_epi8(1);
742 #[rustfmt::skip]
743 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
744 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
745 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
746 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
747 let r = _mm512_mask_permutexvar_epi8(a, 0, idx, a);
748 assert_eq_m512i(r, a);
749 let r = _mm512_mask_permutexvar_epi8(
750 a,
751 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
752 idx,
753 a,
754 );
755 let e = _mm512_set1_epi8(62);
756 assert_eq_m512i(r, e);
757 }
758
759 #[simd_test(enable = "avx512vbmi")]
760 unsafe fn test_mm512_maskz_permutexvar_epi8() {
761 let idx = _mm512_set1_epi8(1);
762 #[rustfmt::skip]
763 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
764 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
765 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
766 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
767 let r = _mm512_maskz_permutexvar_epi8(0, idx, a);
768 assert_eq_m512i(r, _mm512_setzero_si512());
769 let r = _mm512_maskz_permutexvar_epi8(
770 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
771 idx,
772 a,
773 );
774 let e = _mm512_set1_epi8(62);
775 assert_eq_m512i(r, e);
776 }
777
778 #[simd_test(enable = "avx512vbmi,avx512vl")]
779 unsafe fn test_mm256_permutexvar_epi8() {
780 let idx = _mm256_set1_epi8(1);
781 #[rustfmt::skip]
782 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
783 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
784 let r = _mm256_permutexvar_epi8(idx, a);
785 let e = _mm256_set1_epi8(30);
786 assert_eq_m256i(r, e);
787 }
788
789 #[simd_test(enable = "avx512vbmi,avx512vl")]
790 unsafe fn test_mm256_mask_permutexvar_epi8() {
791 let idx = _mm256_set1_epi8(1);
792 #[rustfmt::skip]
793 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
794 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
795 let r = _mm256_mask_permutexvar_epi8(a, 0, idx, a);
796 assert_eq_m256i(r, a);
797 let r = _mm256_mask_permutexvar_epi8(a, 0b11111111_11111111_11111111_11111111, idx, a);
798 let e = _mm256_set1_epi8(30);
799 assert_eq_m256i(r, e);
800 }
801
802 #[simd_test(enable = "avx512vbmi,avx512vl")]
803 unsafe fn test_mm256_maskz_permutexvar_epi8() {
804 let idx = _mm256_set1_epi8(1);
805 #[rustfmt::skip]
806 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
807 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
808 let r = _mm256_maskz_permutexvar_epi8(0, idx, a);
809 assert_eq_m256i(r, _mm256_setzero_si256());
810 let r = _mm256_maskz_permutexvar_epi8(0b11111111_11111111_11111111_11111111, idx, a);
811 let e = _mm256_set1_epi8(30);
812 assert_eq_m256i(r, e);
813 }
814
815 #[simd_test(enable = "avx512vbmi,avx512vl")]
816 unsafe fn test_mm_permutexvar_epi8() {
817 let idx = _mm_set1_epi8(1);
818 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
819 let r = _mm_permutexvar_epi8(idx, a);
820 let e = _mm_set1_epi8(14);
821 assert_eq_m128i(r, e);
822 }
823
824 #[simd_test(enable = "avx512vbmi,avx512vl")]
825 unsafe fn test_mm_mask_permutexvar_epi8() {
826 let idx = _mm_set1_epi8(1);
827 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
828 let r = _mm_mask_permutexvar_epi8(a, 0, idx, a);
829 assert_eq_m128i(r, a);
830 let r = _mm_mask_permutexvar_epi8(a, 0b11111111_11111111, idx, a);
831 let e = _mm_set1_epi8(14);
832 assert_eq_m128i(r, e);
833 }
834
835 #[simd_test(enable = "avx512vbmi,avx512vl")]
836 unsafe fn test_mm_maskz_permutexvar_epi8() {
837 let idx = _mm_set1_epi8(1);
838 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
839 let r = _mm_maskz_permutexvar_epi8(0, idx, a);
840 assert_eq_m128i(r, _mm_setzero_si128());
841 let r = _mm_maskz_permutexvar_epi8(0b11111111_11111111, idx, a);
842 let e = _mm_set1_epi8(14);
843 assert_eq_m128i(r, e);
844 }
845
846 #[simd_test(enable = "avx512vbmi")]
847 unsafe fn test_mm512_multishift_epi64_epi8() {
848 let a = _mm512_set1_epi8(1);
849 let b = _mm512_set1_epi8(1);
850 let r = _mm512_multishift_epi64_epi8(a, b);
851 let e = _mm512_set1_epi8(1 << 7);
852 assert_eq_m512i(r, e);
853 }
854
855 #[simd_test(enable = "avx512vbmi")]
856 unsafe fn test_mm512_mask_multishift_epi64_epi8() {
857 let a = _mm512_set1_epi8(1);
858 let b = _mm512_set1_epi8(1);
859 let r = _mm512_mask_multishift_epi64_epi8(a, 0, a, b);
860 assert_eq_m512i(r, a);
861 let r = _mm512_mask_multishift_epi64_epi8(
862 a,
863 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
864 a,
865 b,
866 );
867 let e = _mm512_set1_epi8(1 << 7);
868 assert_eq_m512i(r, e);
869 }
870
871 #[simd_test(enable = "avx512vbmi")]
872 unsafe fn test_mm512_maskz_multishift_epi64_epi8() {
873 let a = _mm512_set1_epi8(1);
874 let b = _mm512_set1_epi8(1);
875 let r = _mm512_maskz_multishift_epi64_epi8(0, a, b);
876 assert_eq_m512i(r, _mm512_setzero_si512());
877 let r = _mm512_maskz_multishift_epi64_epi8(
878 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
879 a,
880 b,
881 );
882 let e = _mm512_set1_epi8(1 << 7);
883 assert_eq_m512i(r, e);
884 }
885
886 #[simd_test(enable = "avx512vbmi,avx512vl")]
887 unsafe fn test_mm256_multishift_epi64_epi8() {
888 let a = _mm256_set1_epi8(1);
889 let b = _mm256_set1_epi8(1);
890 let r = _mm256_multishift_epi64_epi8(a, b);
891 let e = _mm256_set1_epi8(1 << 7);
892 assert_eq_m256i(r, e);
893 }
894
895 #[simd_test(enable = "avx512vbmi,avx512vl")]
896 unsafe fn test_mm256_mask_multishift_epi64_epi8() {
897 let a = _mm256_set1_epi8(1);
898 let b = _mm256_set1_epi8(1);
899 let r = _mm256_mask_multishift_epi64_epi8(a, 0, a, b);
900 assert_eq_m256i(r, a);
901 let r = _mm256_mask_multishift_epi64_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
902 let e = _mm256_set1_epi8(1 << 7);
903 assert_eq_m256i(r, e);
904 }
905
906 #[simd_test(enable = "avx512vbmi,avx512vl")]
907 unsafe fn test_mm256_maskz_multishift_epi64_epi8() {
908 let a = _mm256_set1_epi8(1);
909 let b = _mm256_set1_epi8(1);
910 let r = _mm256_maskz_multishift_epi64_epi8(0, a, b);
911 assert_eq_m256i(r, _mm256_setzero_si256());
912 let r = _mm256_maskz_multishift_epi64_epi8(0b11111111_11111111_11111111_11111111, a, b);
913 let e = _mm256_set1_epi8(1 << 7);
914 assert_eq_m256i(r, e);
915 }
916
917 #[simd_test(enable = "avx512vbmi,avx512vl")]
918 unsafe fn test_mm_multishift_epi64_epi8() {
919 let a = _mm_set1_epi8(1);
920 let b = _mm_set1_epi8(1);
921 let r = _mm_multishift_epi64_epi8(a, b);
922 let e = _mm_set1_epi8(1 << 7);
923 assert_eq_m128i(r, e);
924 }
925
926 #[simd_test(enable = "avx512vbmi,avx512vl")]
927 unsafe fn test_mm_mask_multishift_epi64_epi8() {
928 let a = _mm_set1_epi8(1);
929 let b = _mm_set1_epi8(1);
930 let r = _mm_mask_multishift_epi64_epi8(a, 0, a, b);
931 assert_eq_m128i(r, a);
932 let r = _mm_mask_multishift_epi64_epi8(a, 0b11111111_11111111, a, b);
933 let e = _mm_set1_epi8(1 << 7);
934 assert_eq_m128i(r, e);
935 }
936
937 #[simd_test(enable = "avx512vbmi,avx512vl")]
938 unsafe fn test_mm_maskz_multishift_epi64_epi8() {
939 let a = _mm_set1_epi8(1);
940 let b = _mm_set1_epi8(1);
941 let r = _mm_maskz_multishift_epi64_epi8(0, a, b);
942 assert_eq_m128i(r, _mm_setzero_si128());
943 let r = _mm_maskz_multishift_epi64_epi8(0b11111111_11111111, a, b);
944 let e = _mm_set1_epi8(1 << 7);
945 assert_eq_m128i(r, e);
946 }
947}
948