1use crate::core_arch::{simd::*, simd_llvm::*, x86::*};
2
3#[cfg(test)]
4use stdarch_test::assert_instr;
5
6/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7///
8/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi8&expand=4262)
9#[inline]
10#[target_feature(enable = "avx512vbmi")]
11#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
12pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
13 transmute(src:vpermi2b(a:a.as_i8x64(), idx:idx.as_i8x64(), b:b.as_i8x64()))
14}
15
16/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
17///
18/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi8&expand=4259)
19#[inline]
20#[target_feature(enable = "avx512vbmi")]
21#[cfg_attr(test, assert_instr(vpermt2b))]
22pub unsafe fn _mm512_mask_permutex2var_epi8(
23 a: __m512i,
24 k: __mmask64,
25 idx: __m512i,
26 b: __m512i,
27) -> __m512i {
28 let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
29 transmute(src:simd_select_bitmask(m:k, a:permute, b:a.as_i8x64()))
30}
31
32/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
33///
34/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi8&expand=4261)
35#[inline]
36#[target_feature(enable = "avx512vbmi")]
37#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
38pub unsafe fn _mm512_maskz_permutex2var_epi8(
39 k: __mmask64,
40 a: __m512i,
41 idx: __m512i,
42 b: __m512i,
43) -> __m512i {
44 let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
45 let zero: i8x64 = _mm512_setzero_si512().as_i8x64();
46 transmute(src:simd_select_bitmask(m:k, a:permute, b:zero))
47}
48
49/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
50///
51/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi8&expand=4260)
52#[inline]
53#[target_feature(enable = "avx512vbmi")]
54#[cfg_attr(test, assert_instr(vpermi2b))]
55pub unsafe fn _mm512_mask2_permutex2var_epi8(
56 a: __m512i,
57 idx: __m512i,
58 k: __mmask64,
59 b: __m512i,
60) -> __m512i {
61 let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
62 transmute(src:simd_select_bitmask(m:k, a:permute, b:idx.as_i8x64()))
63}
64
65/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
66///
67/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi8&expand=4258)
68#[inline]
69#[target_feature(enable = "avx512vbmi,avx512vl")]
70#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
71pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
72 transmute(src:vpermi2b256(a:a.as_i8x32(), idx:idx.as_i8x32(), b:b.as_i8x32()))
73}
74
75/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
76///
77/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi8&expand=4255)
78#[inline]
79#[target_feature(enable = "avx512vbmi,avx512vl")]
80#[cfg_attr(test, assert_instr(vpermt2b))]
81pub unsafe fn _mm256_mask_permutex2var_epi8(
82 a: __m256i,
83 k: __mmask32,
84 idx: __m256i,
85 b: __m256i,
86) -> __m256i {
87 let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
88 transmute(src:simd_select_bitmask(m:k, a:permute, b:a.as_i8x32()))
89}
90
91/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
92///
93/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi8&expand=4257)
94#[inline]
95#[target_feature(enable = "avx512vbmi,avx512vl")]
96#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
97pub unsafe fn _mm256_maskz_permutex2var_epi8(
98 k: __mmask32,
99 a: __m256i,
100 idx: __m256i,
101 b: __m256i,
102) -> __m256i {
103 let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
104 let zero: i8x32 = _mm256_setzero_si256().as_i8x32();
105 transmute(src:simd_select_bitmask(m:k, a:permute, b:zero))
106}
107
108/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
109///
110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi8&expand=4256)
111#[inline]
112#[target_feature(enable = "avx512vbmi,avx512vl")]
113#[cfg_attr(test, assert_instr(vpermi2b))]
114pub unsafe fn _mm256_mask2_permutex2var_epi8(
115 a: __m256i,
116 idx: __m256i,
117 k: __mmask32,
118 b: __m256i,
119) -> __m256i {
120 let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
121 transmute(src:simd_select_bitmask(m:k, a:permute, b:idx.as_i8x32()))
122}
123
124/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
125///
126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi8&expand=4254)
127#[inline]
128#[target_feature(enable = "avx512vbmi,avx512vl")]
129#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
130pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
131 transmute(src:vpermi2b128(a:a.as_i8x16(), idx:idx.as_i8x16(), b:b.as_i8x16()))
132}
133
134/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
135///
136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi8&expand=4251)
137#[inline]
138#[target_feature(enable = "avx512vbmi,avx512vl")]
139#[cfg_attr(test, assert_instr(vpermt2b))]
140pub unsafe fn _mm_mask_permutex2var_epi8(
141 a: __m128i,
142 k: __mmask16,
143 idx: __m128i,
144 b: __m128i,
145) -> __m128i {
146 let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
147 transmute(src:simd_select_bitmask(m:k, a:permute, b:a.as_i8x16()))
148}
149
150/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
151///
152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi8&expand=4253)
153#[inline]
154#[target_feature(enable = "avx512vbmi,avx512vl")]
155#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
156pub unsafe fn _mm_maskz_permutex2var_epi8(
157 k: __mmask16,
158 a: __m128i,
159 idx: __m128i,
160 b: __m128i,
161) -> __m128i {
162 let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
163 let zero: i8x16 = _mm_setzero_si128().as_i8x16();
164 transmute(src:simd_select_bitmask(m:k, a:permute, b:zero))
165}
166
167/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
168///
169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi8&expand=4252)
170#[inline]
171#[target_feature(enable = "avx512vbmi,avx512vl")]
172#[cfg_attr(test, assert_instr(vpermi2b))]
173pub unsafe fn _mm_mask2_permutex2var_epi8(
174 a: __m128i,
175 idx: __m128i,
176 k: __mmask16,
177 b: __m128i,
178) -> __m128i {
179 let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
180 transmute(src:simd_select_bitmask(m:k, a:permute, b:idx.as_i8x16()))
181}
182
183/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
184///
185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi8&expand=4316)
186#[inline]
187#[target_feature(enable = "avx512vbmi")]
188#[cfg_attr(test, assert_instr(vpermb))]
189pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
190 transmute(src:vpermb(a:a.as_i8x64(), idx:idx.as_i8x64()))
191}
192
193/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
194///
195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi8&expand=4314)
196#[inline]
197#[target_feature(enable = "avx512vbmi")]
198#[cfg_attr(test, assert_instr(vpermb))]
199pub unsafe fn _mm512_mask_permutexvar_epi8(
200 src: __m512i,
201 k: __mmask64,
202 idx: __m512i,
203 a: __m512i,
204) -> __m512i {
205 let permute: i8x64 = _mm512_permutexvar_epi8(idx, a).as_i8x64();
206 transmute(src:simd_select_bitmask(m:k, a:permute, b:src.as_i8x64()))
207}
208
209/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
210///
211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi8&expand=4315)
212#[inline]
213#[target_feature(enable = "avx512vbmi")]
214#[cfg_attr(test, assert_instr(vpermb))]
215pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i {
216 let permute: i8x64 = _mm512_permutexvar_epi8(idx, a).as_i8x64();
217 let zero: i8x64 = _mm512_setzero_si512().as_i8x64();
218 transmute(src:simd_select_bitmask(m:k, a:permute, b:zero))
219}
220
221/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
222///
223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi8&expand=4313)
224#[inline]
225#[target_feature(enable = "avx512vbmi,avx512vl")]
226#[cfg_attr(test, assert_instr(vpermb))]
227pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
228 transmute(src:vpermb256(a:a.as_i8x32(), idx:idx.as_i8x32()))
229}
230
231/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi8&expand=4311)
234#[inline]
235#[target_feature(enable = "avx512vbmi,avx512vl")]
236#[cfg_attr(test, assert_instr(vpermb))]
237pub unsafe fn _mm256_mask_permutexvar_epi8(
238 src: __m256i,
239 k: __mmask32,
240 idx: __m256i,
241 a: __m256i,
242) -> __m256i {
243 let permute: i8x32 = _mm256_permutexvar_epi8(idx, a).as_i8x32();
244 transmute(src:simd_select_bitmask(m:k, a:permute, b:src.as_i8x32()))
245}
246
247/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
248///
249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi8&expand=4312)
250#[inline]
251#[target_feature(enable = "avx512vbmi,avx512vl")]
252#[cfg_attr(test, assert_instr(vpermb))]
253pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i {
254 let permute: i8x32 = _mm256_permutexvar_epi8(idx, a).as_i8x32();
255 let zero: i8x32 = _mm256_setzero_si256().as_i8x32();
256 transmute(src:simd_select_bitmask(m:k, a:permute, b:zero))
257}
258
259/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
260///
261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi8&expand=4310)
262#[inline]
263#[target_feature(enable = "avx512vbmi,avx512vl")]
264#[cfg_attr(test, assert_instr(vpermb))]
265pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
266 transmute(src:vpermb128(a:a.as_i8x16(), idx:idx.as_i8x16()))
267}
268
269/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
270///
271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi8&expand=4308)
272#[inline]
273#[target_feature(enable = "avx512vbmi,avx512vl")]
274#[cfg_attr(test, assert_instr(vpermb))]
275pub unsafe fn _mm_mask_permutexvar_epi8(
276 src: __m128i,
277 k: __mmask16,
278 idx: __m128i,
279 a: __m128i,
280) -> __m128i {
281 let permute: i8x16 = _mm_permutexvar_epi8(idx, a).as_i8x16();
282 transmute(src:simd_select_bitmask(m:k, a:permute, b:src.as_i8x16()))
283}
284
285/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
286///
287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi8&expand=4309)
288#[inline]
289#[target_feature(enable = "avx512vbmi,avx512vl")]
290#[cfg_attr(test, assert_instr(vpermb))]
291pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
292 let permute: i8x16 = _mm_permutexvar_epi8(idx, a).as_i8x16();
293 let zero: i8x16 = _mm_setzero_si128().as_i8x16();
294 transmute(src:simd_select_bitmask(m:k, a:permute, b:zero))
295}
296
297/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
298///
299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_multishift_epi64_epi8&expand=4026)
300#[inline]
301#[target_feature(enable = "avx512vbmi")]
302#[cfg_attr(test, assert_instr(vpmultishiftqb))]
303pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
304 transmute(src:vpmultishiftqb(a:a.as_i8x64(), b:b.as_i8x64()))
305}
306
307/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
308///
309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_multishift_epi64_epi8&expand=4024)
310#[inline]
311#[target_feature(enable = "avx512vbmi")]
312#[cfg_attr(test, assert_instr(vpmultishiftqb))]
313pub unsafe fn _mm512_mask_multishift_epi64_epi8(
314 src: __m512i,
315 k: __mmask64,
316 a: __m512i,
317 b: __m512i,
318) -> __m512i {
319 let multishift: i8x64 = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
320 transmute(src:simd_select_bitmask(m:k, a:multishift, b:src.as_i8x64()))
321}
322
323/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
324///
325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_multishift_epi64_epi8&expand=4025)
326#[inline]
327#[target_feature(enable = "avx512vbmi")]
328#[cfg_attr(test, assert_instr(vpmultishiftqb))]
329pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
330 let multishift: i8x64 = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
331 let zero: i8x64 = _mm512_setzero_si512().as_i8x64();
332 transmute(src:simd_select_bitmask(m:k, a:multishift, b:zero))
333}
334
335/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
336///
337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_multishift_epi64_epi8&expand=4023)
338#[inline]
339#[target_feature(enable = "avx512vbmi,avx512vl")]
340#[cfg_attr(test, assert_instr(vpmultishiftqb))]
341pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
342 transmute(src:vpmultishiftqb256(a:a.as_i8x32(), b:b.as_i8x32()))
343}
344
345/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
346///
347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_multishift_epi64_epi8&expand=4021)
348#[inline]
349#[target_feature(enable = "avx512vbmi,avx512vl")]
350#[cfg_attr(test, assert_instr(vpmultishiftqb))]
351pub unsafe fn _mm256_mask_multishift_epi64_epi8(
352 src: __m256i,
353 k: __mmask32,
354 a: __m256i,
355 b: __m256i,
356) -> __m256i {
357 let multishift: i8x32 = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
358 transmute(src:simd_select_bitmask(m:k, a:multishift, b:src.as_i8x32()))
359}
360
361/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
362///
363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_multishift_epi64_epi8&expand=4022)
364#[inline]
365#[target_feature(enable = "avx512vbmi,avx512vl")]
366#[cfg_attr(test, assert_instr(vpmultishiftqb))]
367pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
368 let multishift: i8x32 = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
369 let zero: i8x32 = _mm256_setzero_si256().as_i8x32();
370 transmute(src:simd_select_bitmask(m:k, a:multishift, b:zero))
371}
372
373/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
374///
375/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_multishift_epi64_epi8&expand=4020)
376#[inline]
377#[target_feature(enable = "avx512vbmi,avx512vl")]
378#[cfg_attr(test, assert_instr(vpmultishiftqb))]
379pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
380 transmute(src:vpmultishiftqb128(a:a.as_i8x16(), b:b.as_i8x16()))
381}
382
383/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
384///
385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_multishift_epi64_epi8&expand=4018)
386#[inline]
387#[target_feature(enable = "avx512vbmi,avx512vl")]
388#[cfg_attr(test, assert_instr(vpmultishiftqb))]
389pub unsafe fn _mm_mask_multishift_epi64_epi8(
390 src: __m128i,
391 k: __mmask16,
392 a: __m128i,
393 b: __m128i,
394) -> __m128i {
395 let multishift: i8x16 = _mm_multishift_epi64_epi8(a, b).as_i8x16();
396 transmute(src:simd_select_bitmask(m:k, a:multishift, b:src.as_i8x16()))
397}
398
399/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
400///
401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_multishift_epi64_epi8&expand=4019)
402#[inline]
403#[target_feature(enable = "avx512vbmi,avx512vl")]
404#[cfg_attr(test, assert_instr(vpmultishiftqb))]
405pub unsafe fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
406 let multishift: i8x16 = _mm_multishift_epi64_epi8(a, b).as_i8x16();
407 let zero: i8x16 = _mm_setzero_si128().as_i8x16();
408 transmute(src:simd_select_bitmask(m:k, a:multishift, b:zero))
409}
410
411#[allow(improper_ctypes)]
412extern "C" {
413 #[link_name = "llvm.x86.avx512.vpermi2var.qi.512"]
414 fn vpermi2b(a: i8x64, idx: i8x64, b: i8x64) -> i8x64;
415 #[link_name = "llvm.x86.avx512.vpermi2var.qi.256"]
416 fn vpermi2b256(a: i8x32, idx: i8x32, b: i8x32) -> i8x32;
417 #[link_name = "llvm.x86.avx512.vpermi2var.qi.128"]
418 fn vpermi2b128(a: i8x16, idx: i8x16, b: i8x16) -> i8x16;
419
420 #[link_name = "llvm.x86.avx512.permvar.qi.512"]
421 fn vpermb(a: i8x64, idx: i8x64) -> i8x64;
422 #[link_name = "llvm.x86.avx512.permvar.qi.256"]
423 fn vpermb256(a: i8x32, idx: i8x32) -> i8x32;
424 #[link_name = "llvm.x86.avx512.permvar.qi.128"]
425 fn vpermb128(a: i8x16, idx: i8x16) -> i8x16;
426
427 #[link_name = "llvm.x86.avx512.pmultishift.qb.512"]
428 fn vpmultishiftqb(a: i8x64, b: i8x64) -> i8x64;
429 #[link_name = "llvm.x86.avx512.pmultishift.qb.256"]
430 fn vpmultishiftqb256(a: i8x32, b: i8x32) -> i8x32;
431 #[link_name = "llvm.x86.avx512.pmultishift.qb.128"]
432 fn vpmultishiftqb128(a: i8x16, b: i8x16) -> i8x16;
433}
434
435#[cfg(test)]
436mod tests {
437
438 use stdarch_test::simd_test;
439
440 use crate::core_arch::x86::*;
441
442 #[simd_test(enable = "avx512vbmi")]
443 unsafe fn test_mm512_permutex2var_epi8() {
444 #[rustfmt::skip]
445 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
446 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
447 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
448 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
449 #[rustfmt::skip]
450 let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6,
451 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
452 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
453 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
454 let b = _mm512_set1_epi8(100);
455 let r = _mm512_permutex2var_epi8(a, idx, b);
456 #[rustfmt::skip]
457 let e = _mm512_set_epi8(
458 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
459 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
460 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
461 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
462 );
463 assert_eq_m512i(r, e);
464 }
465
466 #[simd_test(enable = "avx512vbmi")]
467 unsafe fn test_mm512_mask_permutex2var_epi8() {
468 #[rustfmt::skip]
469 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
470 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
471 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
472 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
473 #[rustfmt::skip]
474 let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6,
475 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
476 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
477 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
478 let b = _mm512_set1_epi8(100);
479 let r = _mm512_mask_permutex2var_epi8(a, 0, idx, b);
480 assert_eq_m512i(r, a);
481 let r = _mm512_mask_permutex2var_epi8(
482 a,
483 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
484 idx,
485 b,
486 );
487 #[rustfmt::skip]
488 let e = _mm512_set_epi8(
489 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
490 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
491 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
492 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
493 );
494 assert_eq_m512i(r, e);
495 }
496
497 #[simd_test(enable = "avx512vbmi")]
498 unsafe fn test_mm512_maskz_permutex2var_epi8() {
499 #[rustfmt::skip]
500 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
501 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
502 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
503 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
504 #[rustfmt::skip]
505 let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6,
506 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
507 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
508 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
509 let b = _mm512_set1_epi8(100);
510 let r = _mm512_maskz_permutex2var_epi8(0, a, idx, b);
511 assert_eq_m512i(r, _mm512_setzero_si512());
512 let r = _mm512_maskz_permutex2var_epi8(
513 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
514 a,
515 idx,
516 b,
517 );
518 #[rustfmt::skip]
519 let e = _mm512_set_epi8(
520 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
521 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
522 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
523 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
524 );
525 assert_eq_m512i(r, e);
526 }
527
528 #[simd_test(enable = "avx512vbmi")]
529 unsafe fn test_mm512_mask2_permutex2var_epi8() {
530 #[rustfmt::skip]
531 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
532 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
533 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
534 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
535 #[rustfmt::skip]
536 let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6,
537 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
538 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
539 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
540 let b = _mm512_set1_epi8(100);
541 let r = _mm512_mask2_permutex2var_epi8(a, idx, 0, b);
542 assert_eq_m512i(r, idx);
543 let r = _mm512_mask2_permutex2var_epi8(
544 a,
545 idx,
546 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
547 b,
548 );
549 #[rustfmt::skip]
550 let e = _mm512_set_epi8(
551 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
552 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
553 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
554 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
555 );
556 assert_eq_m512i(r, e);
557 }
558
559 #[simd_test(enable = "avx512vbmi,avx512vl")]
560 unsafe fn test_mm256_permutex2var_epi8() {
561 #[rustfmt::skip]
562 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
563 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
564 #[rustfmt::skip]
565 let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
566 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
567 let b = _mm256_set1_epi8(100);
568 let r = _mm256_permutex2var_epi8(a, idx, b);
569 #[rustfmt::skip]
570 let e = _mm256_set_epi8(
571 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
572 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
573 );
574 assert_eq_m256i(r, e);
575 }
576
577 #[simd_test(enable = "avx512vbmi,avx512vl")]
578 unsafe fn test_mm256_mask_permutex2var_epi8() {
579 #[rustfmt::skip]
580 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
581 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
582 #[rustfmt::skip]
583 let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
584 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
585 let b = _mm256_set1_epi8(100);
586 let r = _mm256_mask_permutex2var_epi8(a, 0, idx, b);
587 assert_eq_m256i(r, a);
588 let r = _mm256_mask_permutex2var_epi8(a, 0b11111111_11111111_11111111_11111111, idx, b);
589 #[rustfmt::skip]
590 let e = _mm256_set_epi8(
591 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
592 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
593 );
594 assert_eq_m256i(r, e);
595 }
596
597 #[simd_test(enable = "avx512vbmi,avx512vl")]
598 unsafe fn test_mm256_maskz_permutex2var_epi8() {
599 #[rustfmt::skip]
600 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
601 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
602 #[rustfmt::skip]
603 let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
604 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
605 let b = _mm256_set1_epi8(100);
606 let r = _mm256_maskz_permutex2var_epi8(0, a, idx, b);
607 assert_eq_m256i(r, _mm256_setzero_si256());
608 let r = _mm256_maskz_permutex2var_epi8(0b11111111_11111111_11111111_11111111, a, idx, b);
609 #[rustfmt::skip]
610 let e = _mm256_set_epi8(
611 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
612 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
613 );
614 assert_eq_m256i(r, e);
615 }
616
617 #[simd_test(enable = "avx512vbmi,avx512vl")]
618 unsafe fn test_mm256_mask2_permutex2var_epi8() {
619 #[rustfmt::skip]
620 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
621 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
622 #[rustfmt::skip]
623 let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
624 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
625 let b = _mm256_set1_epi8(100);
626 let r = _mm256_mask2_permutex2var_epi8(a, idx, 0, b);
627 assert_eq_m256i(r, idx);
628 let r = _mm256_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111_11111111_11111111, b);
629 #[rustfmt::skip]
630 let e = _mm256_set_epi8(
631 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
632 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
633 );
634 assert_eq_m256i(r, e);
635 }
636
637 #[simd_test(enable = "avx512vbmi,avx512vl")]
638 unsafe fn test_mm_permutex2var_epi8() {
639 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
640 #[rustfmt::skip]
641 let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
642 let b = _mm_set1_epi8(100);
643 let r = _mm_permutex2var_epi8(a, idx, b);
644 let e = _mm_set_epi8(
645 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
646 );
647 assert_eq_m128i(r, e);
648 }
649
650 #[simd_test(enable = "avx512vbmi,avx512vl")]
651 unsafe fn test_mm_mask_permutex2var_epi8() {
652 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
653 #[rustfmt::skip]
654 let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
655 let b = _mm_set1_epi8(100);
656 let r = _mm_mask_permutex2var_epi8(a, 0, idx, b);
657 assert_eq_m128i(r, a);
658 let r = _mm_mask_permutex2var_epi8(a, 0b11111111_11111111, idx, b);
659 let e = _mm_set_epi8(
660 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
661 );
662 assert_eq_m128i(r, e);
663 }
664
665 #[simd_test(enable = "avx512vbmi,avx512vl")]
666 unsafe fn test_mm_maskz_permutex2var_epi8() {
667 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
668 #[rustfmt::skip]
669 let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
670 let b = _mm_set1_epi8(100);
671 let r = _mm_maskz_permutex2var_epi8(0, a, idx, b);
672 assert_eq_m128i(r, _mm_setzero_si128());
673 let r = _mm_maskz_permutex2var_epi8(0b11111111_11111111, a, idx, b);
674 let e = _mm_set_epi8(
675 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
676 );
677 assert_eq_m128i(r, e);
678 }
679
680 #[simd_test(enable = "avx512vbmi,avx512vl")]
681 unsafe fn test_mm_mask2_permutex2var_epi8() {
682 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
683 #[rustfmt::skip]
684 let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
685 let b = _mm_set1_epi8(100);
686 let r = _mm_mask2_permutex2var_epi8(a, idx, 0, b);
687 assert_eq_m128i(r, idx);
688 let r = _mm_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111, b);
689 let e = _mm_set_epi8(
690 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
691 );
692 assert_eq_m128i(r, e);
693 }
694
695 #[simd_test(enable = "avx512vbmi")]
696 unsafe fn test_mm512_permutexvar_epi8() {
697 let idx = _mm512_set1_epi8(1);
698 #[rustfmt::skip]
699 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
700 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
701 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
702 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
703 let r = _mm512_permutexvar_epi8(idx, a);
704 let e = _mm512_set1_epi8(62);
705 assert_eq_m512i(r, e);
706 }
707
708 #[simd_test(enable = "avx512vbmi")]
709 unsafe fn test_mm512_mask_permutexvar_epi8() {
710 let idx = _mm512_set1_epi8(1);
711 #[rustfmt::skip]
712 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
713 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
714 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
715 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
716 let r = _mm512_mask_permutexvar_epi8(a, 0, idx, a);
717 assert_eq_m512i(r, a);
718 let r = _mm512_mask_permutexvar_epi8(
719 a,
720 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
721 idx,
722 a,
723 );
724 let e = _mm512_set1_epi8(62);
725 assert_eq_m512i(r, e);
726 }
727
728 #[simd_test(enable = "avx512vbmi")]
729 unsafe fn test_mm512_maskz_permutexvar_epi8() {
730 let idx = _mm512_set1_epi8(1);
731 #[rustfmt::skip]
732 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
733 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
734 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
735 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
736 let r = _mm512_maskz_permutexvar_epi8(0, idx, a);
737 assert_eq_m512i(r, _mm512_setzero_si512());
738 let r = _mm512_maskz_permutexvar_epi8(
739 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
740 idx,
741 a,
742 );
743 let e = _mm512_set1_epi8(62);
744 assert_eq_m512i(r, e);
745 }
746
747 #[simd_test(enable = "avx512vbmi,avx512vl")]
748 unsafe fn test_mm256_permutexvar_epi8() {
749 let idx = _mm256_set1_epi8(1);
750 #[rustfmt::skip]
751 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
752 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
753 let r = _mm256_permutexvar_epi8(idx, a);
754 let e = _mm256_set1_epi8(30);
755 assert_eq_m256i(r, e);
756 }
757
758 #[simd_test(enable = "avx512vbmi,avx512vl")]
759 unsafe fn test_mm256_mask_permutexvar_epi8() {
760 let idx = _mm256_set1_epi8(1);
761 #[rustfmt::skip]
762 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
763 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
764 let r = _mm256_mask_permutexvar_epi8(a, 0, idx, a);
765 assert_eq_m256i(r, a);
766 let r = _mm256_mask_permutexvar_epi8(a, 0b11111111_11111111_11111111_11111111, idx, a);
767 let e = _mm256_set1_epi8(30);
768 assert_eq_m256i(r, e);
769 }
770
771 #[simd_test(enable = "avx512vbmi,avx512vl")]
772 unsafe fn test_mm256_maskz_permutexvar_epi8() {
773 let idx = _mm256_set1_epi8(1);
774 #[rustfmt::skip]
775 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
776 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
777 let r = _mm256_maskz_permutexvar_epi8(0, idx, a);
778 assert_eq_m256i(r, _mm256_setzero_si256());
779 let r = _mm256_maskz_permutexvar_epi8(0b11111111_11111111_11111111_11111111, idx, a);
780 let e = _mm256_set1_epi8(30);
781 assert_eq_m256i(r, e);
782 }
783
784 #[simd_test(enable = "avx512vbmi,avx512vl")]
785 unsafe fn test_mm_permutexvar_epi8() {
786 let idx = _mm_set1_epi8(1);
787 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
788 let r = _mm_permutexvar_epi8(idx, a);
789 let e = _mm_set1_epi8(14);
790 assert_eq_m128i(r, e);
791 }
792
793 #[simd_test(enable = "avx512vbmi,avx512vl")]
794 unsafe fn test_mm_mask_permutexvar_epi8() {
795 let idx = _mm_set1_epi8(1);
796 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
797 let r = _mm_mask_permutexvar_epi8(a, 0, idx, a);
798 assert_eq_m128i(r, a);
799 let r = _mm_mask_permutexvar_epi8(a, 0b11111111_11111111, idx, a);
800 let e = _mm_set1_epi8(14);
801 assert_eq_m128i(r, e);
802 }
803
804 #[simd_test(enable = "avx512vbmi,avx512vl")]
805 unsafe fn test_mm_maskz_permutexvar_epi8() {
806 let idx = _mm_set1_epi8(1);
807 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
808 let r = _mm_maskz_permutexvar_epi8(0, idx, a);
809 assert_eq_m128i(r, _mm_setzero_si128());
810 let r = _mm_maskz_permutexvar_epi8(0b11111111_11111111, idx, a);
811 let e = _mm_set1_epi8(14);
812 assert_eq_m128i(r, e);
813 }
814
815 #[simd_test(enable = "avx512vbmi")]
816 unsafe fn test_mm512_multishift_epi64_epi8() {
817 let a = _mm512_set1_epi8(1);
818 let b = _mm512_set1_epi8(1);
819 let r = _mm512_multishift_epi64_epi8(a, b);
820 let e = _mm512_set1_epi8(1 << 7);
821 assert_eq_m512i(r, e);
822 }
823
824 #[simd_test(enable = "avx512vbmi")]
825 unsafe fn test_mm512_mask_multishift_epi64_epi8() {
826 let a = _mm512_set1_epi8(1);
827 let b = _mm512_set1_epi8(1);
828 let r = _mm512_mask_multishift_epi64_epi8(a, 0, a, b);
829 assert_eq_m512i(r, a);
830 let r = _mm512_mask_multishift_epi64_epi8(
831 a,
832 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
833 a,
834 b,
835 );
836 let e = _mm512_set1_epi8(1 << 7);
837 assert_eq_m512i(r, e);
838 }
839
840 #[simd_test(enable = "avx512vbmi")]
841 unsafe fn test_mm512_maskz_multishift_epi64_epi8() {
842 let a = _mm512_set1_epi8(1);
843 let b = _mm512_set1_epi8(1);
844 let r = _mm512_maskz_multishift_epi64_epi8(0, a, b);
845 assert_eq_m512i(r, _mm512_setzero_si512());
846 let r = _mm512_maskz_multishift_epi64_epi8(
847 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
848 a,
849 b,
850 );
851 let e = _mm512_set1_epi8(1 << 7);
852 assert_eq_m512i(r, e);
853 }
854
855 #[simd_test(enable = "avx512vbmi,avx512vl")]
856 unsafe fn test_mm256_multishift_epi64_epi8() {
857 let a = _mm256_set1_epi8(1);
858 let b = _mm256_set1_epi8(1);
859 let r = _mm256_multishift_epi64_epi8(a, b);
860 let e = _mm256_set1_epi8(1 << 7);
861 assert_eq_m256i(r, e);
862 }
863
864 #[simd_test(enable = "avx512vbmi,avx512vl")]
865 unsafe fn test_mm256_mask_multishift_epi64_epi8() {
866 let a = _mm256_set1_epi8(1);
867 let b = _mm256_set1_epi8(1);
868 let r = _mm256_mask_multishift_epi64_epi8(a, 0, a, b);
869 assert_eq_m256i(r, a);
870 let r = _mm256_mask_multishift_epi64_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
871 let e = _mm256_set1_epi8(1 << 7);
872 assert_eq_m256i(r, e);
873 }
874
875 #[simd_test(enable = "avx512vbmi,avx512vl")]
876 unsafe fn test_mm256_maskz_multishift_epi64_epi8() {
877 let a = _mm256_set1_epi8(1);
878 let b = _mm256_set1_epi8(1);
879 let r = _mm256_maskz_multishift_epi64_epi8(0, a, b);
880 assert_eq_m256i(r, _mm256_setzero_si256());
881 let r = _mm256_maskz_multishift_epi64_epi8(0b11111111_11111111_11111111_11111111, a, b);
882 let e = _mm256_set1_epi8(1 << 7);
883 assert_eq_m256i(r, e);
884 }
885
886 #[simd_test(enable = "avx512vbmi,avx512vl")]
887 unsafe fn test_mm_multishift_epi64_epi8() {
888 let a = _mm_set1_epi8(1);
889 let b = _mm_set1_epi8(1);
890 let r = _mm_multishift_epi64_epi8(a, b);
891 let e = _mm_set1_epi8(1 << 7);
892 assert_eq_m128i(r, e);
893 }
894
895 #[simd_test(enable = "avx512vbmi,avx512vl")]
896 unsafe fn test_mm_mask_multishift_epi64_epi8() {
897 let a = _mm_set1_epi8(1);
898 let b = _mm_set1_epi8(1);
899 let r = _mm_mask_multishift_epi64_epi8(a, 0, a, b);
900 assert_eq_m128i(r, a);
901 let r = _mm_mask_multishift_epi64_epi8(a, 0b11111111_11111111, a, b);
902 let e = _mm_set1_epi8(1 << 7);
903 assert_eq_m128i(r, e);
904 }
905
906 #[simd_test(enable = "avx512vbmi,avx512vl")]
907 unsafe fn test_mm_maskz_multishift_epi64_epi8() {
908 let a = _mm_set1_epi8(1);
909 let b = _mm_set1_epi8(1);
910 let r = _mm_maskz_multishift_epi64_epi8(0, a, b);
911 assert_eq_m128i(r, _mm_setzero_si128());
912 let r = _mm_maskz_multishift_epi64_epi8(0b11111111_11111111, a, b);
913 let e = _mm_set1_epi8(1 << 7);
914 assert_eq_m128i(r, e);
915 }
916}
917