1 | use crate::core_arch::{simd::*, simd_llvm::*, x86::*}; |
2 | |
3 | #[cfg (test)] |
4 | use stdarch_test::assert_instr; |
5 | |
6 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. |
7 | /// |
8 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi8&expand=4262) |
9 | #[inline ] |
10 | #[target_feature (enable = "avx512vbmi" )] |
11 | #[cfg_attr (test, assert_instr(vperm))] //should be vpermi2b |
12 | pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { |
13 | transmute(src:vpermi2b(a:a.as_i8x64(), idx:idx.as_i8x64(), b:b.as_i8x64())) |
14 | } |
15 | |
16 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
17 | /// |
18 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi8&expand=4259) |
19 | #[inline ] |
20 | #[target_feature (enable = "avx512vbmi" )] |
21 | #[cfg_attr (test, assert_instr(vpermt2b))] |
22 | pub unsafe fn _mm512_mask_permutex2var_epi8( |
23 | a: __m512i, |
24 | k: __mmask64, |
25 | idx: __m512i, |
26 | b: __m512i, |
27 | ) -> __m512i { |
28 | let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); |
29 | transmute(src:simd_select_bitmask(m:k, a:permute, b:a.as_i8x64())) |
30 | } |
31 | |
32 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
33 | /// |
34 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi8&expand=4261) |
35 | #[inline ] |
36 | #[target_feature (enable = "avx512vbmi" )] |
37 | #[cfg_attr (test, assert_instr(vperm))] //should be vpermi2b |
38 | pub unsafe fn _mm512_maskz_permutex2var_epi8( |
39 | k: __mmask64, |
40 | a: __m512i, |
41 | idx: __m512i, |
42 | b: __m512i, |
43 | ) -> __m512i { |
44 | let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); |
45 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
46 | transmute(src:simd_select_bitmask(m:k, a:permute, b:zero)) |
47 | } |
48 | |
49 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
50 | /// |
51 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi8&expand=4260) |
52 | #[inline ] |
53 | #[target_feature (enable = "avx512vbmi" )] |
54 | #[cfg_attr (test, assert_instr(vpermi2b))] |
55 | pub unsafe fn _mm512_mask2_permutex2var_epi8( |
56 | a: __m512i, |
57 | idx: __m512i, |
58 | k: __mmask64, |
59 | b: __m512i, |
60 | ) -> __m512i { |
61 | let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); |
62 | transmute(src:simd_select_bitmask(m:k, a:permute, b:idx.as_i8x64())) |
63 | } |
64 | |
65 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. |
66 | /// |
67 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi8&expand=4258) |
68 | #[inline ] |
69 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
70 | #[cfg_attr (test, assert_instr(vperm))] //should be vpermi2b |
71 | pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { |
72 | transmute(src:vpermi2b256(a:a.as_i8x32(), idx:idx.as_i8x32(), b:b.as_i8x32())) |
73 | } |
74 | |
75 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
76 | /// |
77 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi8&expand=4255) |
78 | #[inline ] |
79 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
80 | #[cfg_attr (test, assert_instr(vpermt2b))] |
81 | pub unsafe fn _mm256_mask_permutex2var_epi8( |
82 | a: __m256i, |
83 | k: __mmask32, |
84 | idx: __m256i, |
85 | b: __m256i, |
86 | ) -> __m256i { |
87 | let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); |
88 | transmute(src:simd_select_bitmask(m:k, a:permute, b:a.as_i8x32())) |
89 | } |
90 | |
91 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
92 | /// |
93 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi8&expand=4257) |
94 | #[inline ] |
95 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
96 | #[cfg_attr (test, assert_instr(vperm))] //should be vpermi2b |
97 | pub unsafe fn _mm256_maskz_permutex2var_epi8( |
98 | k: __mmask32, |
99 | a: __m256i, |
100 | idx: __m256i, |
101 | b: __m256i, |
102 | ) -> __m256i { |
103 | let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); |
104 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
105 | transmute(src:simd_select_bitmask(m:k, a:permute, b:zero)) |
106 | } |
107 | |
108 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
109 | /// |
110 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi8&expand=4256) |
111 | #[inline ] |
112 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
113 | #[cfg_attr (test, assert_instr(vpermi2b))] |
114 | pub unsafe fn _mm256_mask2_permutex2var_epi8( |
115 | a: __m256i, |
116 | idx: __m256i, |
117 | k: __mmask32, |
118 | b: __m256i, |
119 | ) -> __m256i { |
120 | let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); |
121 | transmute(src:simd_select_bitmask(m:k, a:permute, b:idx.as_i8x32())) |
122 | } |
123 | |
124 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. |
125 | /// |
126 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi8&expand=4254) |
127 | #[inline ] |
128 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
129 | #[cfg_attr (test, assert_instr(vperm))] //should be vpermi2b |
130 | pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { |
131 | transmute(src:vpermi2b128(a:a.as_i8x16(), idx:idx.as_i8x16(), b:b.as_i8x16())) |
132 | } |
133 | |
134 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
135 | /// |
136 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi8&expand=4251) |
137 | #[inline ] |
138 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
139 | #[cfg_attr (test, assert_instr(vpermt2b))] |
140 | pub unsafe fn _mm_mask_permutex2var_epi8( |
141 | a: __m128i, |
142 | k: __mmask16, |
143 | idx: __m128i, |
144 | b: __m128i, |
145 | ) -> __m128i { |
146 | let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); |
147 | transmute(src:simd_select_bitmask(m:k, a:permute, b:a.as_i8x16())) |
148 | } |
149 | |
150 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
151 | /// |
152 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi8&expand=4253) |
153 | #[inline ] |
154 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
155 | #[cfg_attr (test, assert_instr(vperm))] //should be vpermi2b |
156 | pub unsafe fn _mm_maskz_permutex2var_epi8( |
157 | k: __mmask16, |
158 | a: __m128i, |
159 | idx: __m128i, |
160 | b: __m128i, |
161 | ) -> __m128i { |
162 | let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); |
163 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
164 | transmute(src:simd_select_bitmask(m:k, a:permute, b:zero)) |
165 | } |
166 | |
167 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
168 | /// |
169 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi8&expand=4252) |
170 | #[inline ] |
171 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
172 | #[cfg_attr (test, assert_instr(vpermi2b))] |
173 | pub unsafe fn _mm_mask2_permutex2var_epi8( |
174 | a: __m128i, |
175 | idx: __m128i, |
176 | k: __mmask16, |
177 | b: __m128i, |
178 | ) -> __m128i { |
179 | let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); |
180 | transmute(src:simd_select_bitmask(m:k, a:permute, b:idx.as_i8x16())) |
181 | } |
182 | |
183 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. |
184 | /// |
185 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi8&expand=4316) |
186 | #[inline ] |
187 | #[target_feature (enable = "avx512vbmi" )] |
188 | #[cfg_attr (test, assert_instr(vpermb))] |
189 | pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i { |
190 | transmute(src:vpermb(a:a.as_i8x64(), idx:idx.as_i8x64())) |
191 | } |
192 | |
193 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
194 | /// |
195 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi8&expand=4314) |
196 | #[inline ] |
197 | #[target_feature (enable = "avx512vbmi" )] |
198 | #[cfg_attr (test, assert_instr(vpermb))] |
199 | pub unsafe fn _mm512_mask_permutexvar_epi8( |
200 | src: __m512i, |
201 | k: __mmask64, |
202 | idx: __m512i, |
203 | a: __m512i, |
204 | ) -> __m512i { |
205 | let permute: i8x64 = _mm512_permutexvar_epi8(idx, a).as_i8x64(); |
206 | transmute(src:simd_select_bitmask(m:k, a:permute, b:src.as_i8x64())) |
207 | } |
208 | |
209 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
210 | /// |
211 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi8&expand=4315) |
212 | #[inline ] |
213 | #[target_feature (enable = "avx512vbmi" )] |
214 | #[cfg_attr (test, assert_instr(vpermb))] |
215 | pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i { |
216 | let permute: i8x64 = _mm512_permutexvar_epi8(idx, a).as_i8x64(); |
217 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
218 | transmute(src:simd_select_bitmask(m:k, a:permute, b:zero)) |
219 | } |
220 | |
221 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. |
222 | /// |
223 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi8&expand=4313) |
224 | #[inline ] |
225 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
226 | #[cfg_attr (test, assert_instr(vpermb))] |
227 | pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i { |
228 | transmute(src:vpermb256(a:a.as_i8x32(), idx:idx.as_i8x32())) |
229 | } |
230 | |
231 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
232 | /// |
233 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi8&expand=4311) |
234 | #[inline ] |
235 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
236 | #[cfg_attr (test, assert_instr(vpermb))] |
237 | pub unsafe fn _mm256_mask_permutexvar_epi8( |
238 | src: __m256i, |
239 | k: __mmask32, |
240 | idx: __m256i, |
241 | a: __m256i, |
242 | ) -> __m256i { |
243 | let permute: i8x32 = _mm256_permutexvar_epi8(idx, a).as_i8x32(); |
244 | transmute(src:simd_select_bitmask(m:k, a:permute, b:src.as_i8x32())) |
245 | } |
246 | |
247 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
248 | /// |
249 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi8&expand=4312) |
250 | #[inline ] |
251 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
252 | #[cfg_attr (test, assert_instr(vpermb))] |
253 | pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i { |
254 | let permute: i8x32 = _mm256_permutexvar_epi8(idx, a).as_i8x32(); |
255 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
256 | transmute(src:simd_select_bitmask(m:k, a:permute, b:zero)) |
257 | } |
258 | |
259 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. |
260 | /// |
261 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi8&expand=4310) |
262 | #[inline ] |
263 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
264 | #[cfg_attr (test, assert_instr(vpermb))] |
265 | pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i { |
266 | transmute(src:vpermb128(a:a.as_i8x16(), idx:idx.as_i8x16())) |
267 | } |
268 | |
269 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
270 | /// |
271 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi8&expand=4308) |
272 | #[inline ] |
273 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
274 | #[cfg_attr (test, assert_instr(vpermb))] |
275 | pub unsafe fn _mm_mask_permutexvar_epi8( |
276 | src: __m128i, |
277 | k: __mmask16, |
278 | idx: __m128i, |
279 | a: __m128i, |
280 | ) -> __m128i { |
281 | let permute: i8x16 = _mm_permutexvar_epi8(idx, a).as_i8x16(); |
282 | transmute(src:simd_select_bitmask(m:k, a:permute, b:src.as_i8x16())) |
283 | } |
284 | |
285 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
286 | /// |
287 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi8&expand=4309) |
288 | #[inline ] |
289 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
290 | #[cfg_attr (test, assert_instr(vpermb))] |
291 | pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i { |
292 | let permute: i8x16 = _mm_permutexvar_epi8(idx, a).as_i8x16(); |
293 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
294 | transmute(src:simd_select_bitmask(m:k, a:permute, b:zero)) |
295 | } |
296 | |
297 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. |
298 | /// |
299 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_multishift_epi64_epi8&expand=4026) |
300 | #[inline ] |
301 | #[target_feature (enable = "avx512vbmi" )] |
302 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
303 | pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i { |
304 | transmute(src:vpmultishiftqb(a:a.as_i8x64(), b:b.as_i8x64())) |
305 | } |
306 | |
307 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
308 | /// |
309 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_multishift_epi64_epi8&expand=4024) |
310 | #[inline ] |
311 | #[target_feature (enable = "avx512vbmi" )] |
312 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
313 | pub unsafe fn _mm512_mask_multishift_epi64_epi8( |
314 | src: __m512i, |
315 | k: __mmask64, |
316 | a: __m512i, |
317 | b: __m512i, |
318 | ) -> __m512i { |
319 | let multishift: i8x64 = _mm512_multishift_epi64_epi8(a, b).as_i8x64(); |
320 | transmute(src:simd_select_bitmask(m:k, a:multishift, b:src.as_i8x64())) |
321 | } |
322 | |
323 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
324 | /// |
325 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_multishift_epi64_epi8&expand=4025) |
326 | #[inline ] |
327 | #[target_feature (enable = "avx512vbmi" )] |
328 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
329 | pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
330 | let multishift: i8x64 = _mm512_multishift_epi64_epi8(a, b).as_i8x64(); |
331 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
332 | transmute(src:simd_select_bitmask(m:k, a:multishift, b:zero)) |
333 | } |
334 | |
335 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. |
336 | /// |
337 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_multishift_epi64_epi8&expand=4023) |
338 | #[inline ] |
339 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
340 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
341 | pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i { |
342 | transmute(src:vpmultishiftqb256(a:a.as_i8x32(), b:b.as_i8x32())) |
343 | } |
344 | |
345 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
346 | /// |
347 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_multishift_epi64_epi8&expand=4021) |
348 | #[inline ] |
349 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
350 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
351 | pub unsafe fn _mm256_mask_multishift_epi64_epi8( |
352 | src: __m256i, |
353 | k: __mmask32, |
354 | a: __m256i, |
355 | b: __m256i, |
356 | ) -> __m256i { |
357 | let multishift: i8x32 = _mm256_multishift_epi64_epi8(a, b).as_i8x32(); |
358 | transmute(src:simd_select_bitmask(m:k, a:multishift, b:src.as_i8x32())) |
359 | } |
360 | |
361 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
362 | /// |
363 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_multishift_epi64_epi8&expand=4022) |
364 | #[inline ] |
365 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
366 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
367 | pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
368 | let multishift: i8x32 = _mm256_multishift_epi64_epi8(a, b).as_i8x32(); |
369 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
370 | transmute(src:simd_select_bitmask(m:k, a:multishift, b:zero)) |
371 | } |
372 | |
373 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. |
374 | /// |
375 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_multishift_epi64_epi8&expand=4020) |
376 | #[inline ] |
377 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
378 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
379 | pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i { |
380 | transmute(src:vpmultishiftqb128(a:a.as_i8x16(), b:b.as_i8x16())) |
381 | } |
382 | |
383 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
384 | /// |
385 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_multishift_epi64_epi8&expand=4018) |
386 | #[inline ] |
387 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
388 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
389 | pub unsafe fn _mm_mask_multishift_epi64_epi8( |
390 | src: __m128i, |
391 | k: __mmask16, |
392 | a: __m128i, |
393 | b: __m128i, |
394 | ) -> __m128i { |
395 | let multishift: i8x16 = _mm_multishift_epi64_epi8(a, b).as_i8x16(); |
396 | transmute(src:simd_select_bitmask(m:k, a:multishift, b:src.as_i8x16())) |
397 | } |
398 | |
399 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
400 | /// |
401 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_multishift_epi64_epi8&expand=4019) |
402 | #[inline ] |
403 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
404 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
405 | pub unsafe fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
406 | let multishift: i8x16 = _mm_multishift_epi64_epi8(a, b).as_i8x16(); |
407 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
408 | transmute(src:simd_select_bitmask(m:k, a:multishift, b:zero)) |
409 | } |
410 | |
411 | #[allow (improper_ctypes)] |
412 | extern "C" { |
413 | #[link_name = "llvm.x86.avx512.vpermi2var.qi.512" ] |
414 | fn vpermi2b(a: i8x64, idx: i8x64, b: i8x64) -> i8x64; |
415 | #[link_name = "llvm.x86.avx512.vpermi2var.qi.256" ] |
416 | fn vpermi2b256(a: i8x32, idx: i8x32, b: i8x32) -> i8x32; |
417 | #[link_name = "llvm.x86.avx512.vpermi2var.qi.128" ] |
418 | fn vpermi2b128(a: i8x16, idx: i8x16, b: i8x16) -> i8x16; |
419 | |
420 | #[link_name = "llvm.x86.avx512.permvar.qi.512" ] |
421 | fn vpermb(a: i8x64, idx: i8x64) -> i8x64; |
422 | #[link_name = "llvm.x86.avx512.permvar.qi.256" ] |
423 | fn vpermb256(a: i8x32, idx: i8x32) -> i8x32; |
424 | #[link_name = "llvm.x86.avx512.permvar.qi.128" ] |
425 | fn vpermb128(a: i8x16, idx: i8x16) -> i8x16; |
426 | |
427 | #[link_name = "llvm.x86.avx512.pmultishift.qb.512" ] |
428 | fn vpmultishiftqb(a: i8x64, b: i8x64) -> i8x64; |
429 | #[link_name = "llvm.x86.avx512.pmultishift.qb.256" ] |
430 | fn vpmultishiftqb256(a: i8x32, b: i8x32) -> i8x32; |
431 | #[link_name = "llvm.x86.avx512.pmultishift.qb.128" ] |
432 | fn vpmultishiftqb128(a: i8x16, b: i8x16) -> i8x16; |
433 | } |
434 | |
435 | #[cfg (test)] |
436 | mod tests { |
437 | |
438 | use stdarch_test::simd_test; |
439 | |
440 | use crate::core_arch::x86::*; |
441 | |
442 | #[simd_test(enable = "avx512vbmi" )] |
443 | unsafe fn test_mm512_permutex2var_epi8() { |
444 | #[rustfmt::skip] |
445 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
446 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
447 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
448 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
449 | #[rustfmt::skip] |
450 | let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6, |
451 | 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6, |
452 | 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6, |
453 | 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6); |
454 | let b = _mm512_set1_epi8(100); |
455 | let r = _mm512_permutex2var_epi8(a, idx, b); |
456 | #[rustfmt::skip] |
457 | let e = _mm512_set_epi8( |
458 | 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100, |
459 | 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100, |
460 | 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100, |
461 | 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100, |
462 | ); |
463 | assert_eq_m512i(r, e); |
464 | } |
465 | |
466 | #[simd_test(enable = "avx512vbmi" )] |
467 | unsafe fn test_mm512_mask_permutex2var_epi8() { |
468 | #[rustfmt::skip] |
469 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
470 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
471 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
472 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
473 | #[rustfmt::skip] |
474 | let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6, |
475 | 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6, |
476 | 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6, |
477 | 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6); |
478 | let b = _mm512_set1_epi8(100); |
479 | let r = _mm512_mask_permutex2var_epi8(a, 0, idx, b); |
480 | assert_eq_m512i(r, a); |
481 | let r = _mm512_mask_permutex2var_epi8( |
482 | a, |
483 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
484 | idx, |
485 | b, |
486 | ); |
487 | #[rustfmt::skip] |
488 | let e = _mm512_set_epi8( |
489 | 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100, |
490 | 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100, |
491 | 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100, |
492 | 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100, |
493 | ); |
494 | assert_eq_m512i(r, e); |
495 | } |
496 | |
497 | #[simd_test(enable = "avx512vbmi" )] |
498 | unsafe fn test_mm512_maskz_permutex2var_epi8() { |
499 | #[rustfmt::skip] |
500 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
501 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
502 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
503 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
504 | #[rustfmt::skip] |
505 | let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6, |
506 | 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6, |
507 | 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6, |
508 | 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6); |
509 | let b = _mm512_set1_epi8(100); |
510 | let r = _mm512_maskz_permutex2var_epi8(0, a, idx, b); |
511 | assert_eq_m512i(r, _mm512_setzero_si512()); |
512 | let r = _mm512_maskz_permutex2var_epi8( |
513 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
514 | a, |
515 | idx, |
516 | b, |
517 | ); |
518 | #[rustfmt::skip] |
519 | let e = _mm512_set_epi8( |
520 | 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100, |
521 | 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100, |
522 | 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100, |
523 | 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100, |
524 | ); |
525 | assert_eq_m512i(r, e); |
526 | } |
527 | |
528 | #[simd_test(enable = "avx512vbmi" )] |
529 | unsafe fn test_mm512_mask2_permutex2var_epi8() { |
530 | #[rustfmt::skip] |
531 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
532 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
533 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
534 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
535 | #[rustfmt::skip] |
536 | let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6, |
537 | 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6, |
538 | 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6, |
539 | 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6); |
540 | let b = _mm512_set1_epi8(100); |
541 | let r = _mm512_mask2_permutex2var_epi8(a, idx, 0, b); |
542 | assert_eq_m512i(r, idx); |
543 | let r = _mm512_mask2_permutex2var_epi8( |
544 | a, |
545 | idx, |
546 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
547 | b, |
548 | ); |
549 | #[rustfmt::skip] |
550 | let e = _mm512_set_epi8( |
551 | 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100, |
552 | 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100, |
553 | 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100, |
554 | 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100, |
555 | ); |
556 | assert_eq_m512i(r, e); |
557 | } |
558 | |
559 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
560 | unsafe fn test_mm256_permutex2var_epi8() { |
561 | #[rustfmt::skip] |
562 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
563 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
564 | #[rustfmt::skip] |
565 | let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, |
566 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); |
567 | let b = _mm256_set1_epi8(100); |
568 | let r = _mm256_permutex2var_epi8(a, idx, b); |
569 | #[rustfmt::skip] |
570 | let e = _mm256_set_epi8( |
571 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, |
572 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, |
573 | ); |
574 | assert_eq_m256i(r, e); |
575 | } |
576 | |
577 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
578 | unsafe fn test_mm256_mask_permutex2var_epi8() { |
579 | #[rustfmt::skip] |
580 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
581 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
582 | #[rustfmt::skip] |
583 | let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, |
584 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); |
585 | let b = _mm256_set1_epi8(100); |
586 | let r = _mm256_mask_permutex2var_epi8(a, 0, idx, b); |
587 | assert_eq_m256i(r, a); |
588 | let r = _mm256_mask_permutex2var_epi8(a, 0b11111111_11111111_11111111_11111111, idx, b); |
589 | #[rustfmt::skip] |
590 | let e = _mm256_set_epi8( |
591 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, |
592 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, |
593 | ); |
594 | assert_eq_m256i(r, e); |
595 | } |
596 | |
597 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
598 | unsafe fn test_mm256_maskz_permutex2var_epi8() { |
599 | #[rustfmt::skip] |
600 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
601 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
602 | #[rustfmt::skip] |
603 | let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, |
604 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); |
605 | let b = _mm256_set1_epi8(100); |
606 | let r = _mm256_maskz_permutex2var_epi8(0, a, idx, b); |
607 | assert_eq_m256i(r, _mm256_setzero_si256()); |
608 | let r = _mm256_maskz_permutex2var_epi8(0b11111111_11111111_11111111_11111111, a, idx, b); |
609 | #[rustfmt::skip] |
610 | let e = _mm256_set_epi8( |
611 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, |
612 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, |
613 | ); |
614 | assert_eq_m256i(r, e); |
615 | } |
616 | |
617 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
618 | unsafe fn test_mm256_mask2_permutex2var_epi8() { |
619 | #[rustfmt::skip] |
620 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
621 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
622 | #[rustfmt::skip] |
623 | let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, |
624 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); |
625 | let b = _mm256_set1_epi8(100); |
626 | let r = _mm256_mask2_permutex2var_epi8(a, idx, 0, b); |
627 | assert_eq_m256i(r, idx); |
628 | let r = _mm256_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111_11111111_11111111, b); |
629 | #[rustfmt::skip] |
630 | let e = _mm256_set_epi8( |
631 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, |
632 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, |
633 | ); |
634 | assert_eq_m256i(r, e); |
635 | } |
636 | |
637 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
638 | unsafe fn test_mm_permutex2var_epi8() { |
639 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
640 | #[rustfmt::skip] |
641 | let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4); |
642 | let b = _mm_set1_epi8(100); |
643 | let r = _mm_permutex2var_epi8(a, idx, b); |
644 | let e = _mm_set_epi8( |
645 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, |
646 | ); |
647 | assert_eq_m128i(r, e); |
648 | } |
649 | |
650 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
651 | unsafe fn test_mm_mask_permutex2var_epi8() { |
652 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
653 | #[rustfmt::skip] |
654 | let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4); |
655 | let b = _mm_set1_epi8(100); |
656 | let r = _mm_mask_permutex2var_epi8(a, 0, idx, b); |
657 | assert_eq_m128i(r, a); |
658 | let r = _mm_mask_permutex2var_epi8(a, 0b11111111_11111111, idx, b); |
659 | let e = _mm_set_epi8( |
660 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, |
661 | ); |
662 | assert_eq_m128i(r, e); |
663 | } |
664 | |
665 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
666 | unsafe fn test_mm_maskz_permutex2var_epi8() { |
667 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
668 | #[rustfmt::skip] |
669 | let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4); |
670 | let b = _mm_set1_epi8(100); |
671 | let r = _mm_maskz_permutex2var_epi8(0, a, idx, b); |
672 | assert_eq_m128i(r, _mm_setzero_si128()); |
673 | let r = _mm_maskz_permutex2var_epi8(0b11111111_11111111, a, idx, b); |
674 | let e = _mm_set_epi8( |
675 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, |
676 | ); |
677 | assert_eq_m128i(r, e); |
678 | } |
679 | |
680 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
681 | unsafe fn test_mm_mask2_permutex2var_epi8() { |
682 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
683 | #[rustfmt::skip] |
684 | let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4); |
685 | let b = _mm_set1_epi8(100); |
686 | let r = _mm_mask2_permutex2var_epi8(a, idx, 0, b); |
687 | assert_eq_m128i(r, idx); |
688 | let r = _mm_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111, b); |
689 | let e = _mm_set_epi8( |
690 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, |
691 | ); |
692 | assert_eq_m128i(r, e); |
693 | } |
694 | |
695 | #[simd_test(enable = "avx512vbmi" )] |
696 | unsafe fn test_mm512_permutexvar_epi8() { |
697 | let idx = _mm512_set1_epi8(1); |
698 | #[rustfmt::skip] |
699 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
700 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
701 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
702 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
703 | let r = _mm512_permutexvar_epi8(idx, a); |
704 | let e = _mm512_set1_epi8(62); |
705 | assert_eq_m512i(r, e); |
706 | } |
707 | |
708 | #[simd_test(enable = "avx512vbmi" )] |
709 | unsafe fn test_mm512_mask_permutexvar_epi8() { |
710 | let idx = _mm512_set1_epi8(1); |
711 | #[rustfmt::skip] |
712 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
713 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
714 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
715 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
716 | let r = _mm512_mask_permutexvar_epi8(a, 0, idx, a); |
717 | assert_eq_m512i(r, a); |
718 | let r = _mm512_mask_permutexvar_epi8( |
719 | a, |
720 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
721 | idx, |
722 | a, |
723 | ); |
724 | let e = _mm512_set1_epi8(62); |
725 | assert_eq_m512i(r, e); |
726 | } |
727 | |
728 | #[simd_test(enable = "avx512vbmi" )] |
729 | unsafe fn test_mm512_maskz_permutexvar_epi8() { |
730 | let idx = _mm512_set1_epi8(1); |
731 | #[rustfmt::skip] |
732 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
733 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
734 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
735 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
736 | let r = _mm512_maskz_permutexvar_epi8(0, idx, a); |
737 | assert_eq_m512i(r, _mm512_setzero_si512()); |
738 | let r = _mm512_maskz_permutexvar_epi8( |
739 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
740 | idx, |
741 | a, |
742 | ); |
743 | let e = _mm512_set1_epi8(62); |
744 | assert_eq_m512i(r, e); |
745 | } |
746 | |
747 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
748 | unsafe fn test_mm256_permutexvar_epi8() { |
749 | let idx = _mm256_set1_epi8(1); |
750 | #[rustfmt::skip] |
751 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
752 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
753 | let r = _mm256_permutexvar_epi8(idx, a); |
754 | let e = _mm256_set1_epi8(30); |
755 | assert_eq_m256i(r, e); |
756 | } |
757 | |
758 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
759 | unsafe fn test_mm256_mask_permutexvar_epi8() { |
760 | let idx = _mm256_set1_epi8(1); |
761 | #[rustfmt::skip] |
762 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
763 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
764 | let r = _mm256_mask_permutexvar_epi8(a, 0, idx, a); |
765 | assert_eq_m256i(r, a); |
766 | let r = _mm256_mask_permutexvar_epi8(a, 0b11111111_11111111_11111111_11111111, idx, a); |
767 | let e = _mm256_set1_epi8(30); |
768 | assert_eq_m256i(r, e); |
769 | } |
770 | |
771 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
772 | unsafe fn test_mm256_maskz_permutexvar_epi8() { |
773 | let idx = _mm256_set1_epi8(1); |
774 | #[rustfmt::skip] |
775 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
776 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
777 | let r = _mm256_maskz_permutexvar_epi8(0, idx, a); |
778 | assert_eq_m256i(r, _mm256_setzero_si256()); |
779 | let r = _mm256_maskz_permutexvar_epi8(0b11111111_11111111_11111111_11111111, idx, a); |
780 | let e = _mm256_set1_epi8(30); |
781 | assert_eq_m256i(r, e); |
782 | } |
783 | |
784 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
785 | unsafe fn test_mm_permutexvar_epi8() { |
786 | let idx = _mm_set1_epi8(1); |
787 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
788 | let r = _mm_permutexvar_epi8(idx, a); |
789 | let e = _mm_set1_epi8(14); |
790 | assert_eq_m128i(r, e); |
791 | } |
792 | |
793 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
794 | unsafe fn test_mm_mask_permutexvar_epi8() { |
795 | let idx = _mm_set1_epi8(1); |
796 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
797 | let r = _mm_mask_permutexvar_epi8(a, 0, idx, a); |
798 | assert_eq_m128i(r, a); |
799 | let r = _mm_mask_permutexvar_epi8(a, 0b11111111_11111111, idx, a); |
800 | let e = _mm_set1_epi8(14); |
801 | assert_eq_m128i(r, e); |
802 | } |
803 | |
804 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
805 | unsafe fn test_mm_maskz_permutexvar_epi8() { |
806 | let idx = _mm_set1_epi8(1); |
807 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
808 | let r = _mm_maskz_permutexvar_epi8(0, idx, a); |
809 | assert_eq_m128i(r, _mm_setzero_si128()); |
810 | let r = _mm_maskz_permutexvar_epi8(0b11111111_11111111, idx, a); |
811 | let e = _mm_set1_epi8(14); |
812 | assert_eq_m128i(r, e); |
813 | } |
814 | |
815 | #[simd_test(enable = "avx512vbmi" )] |
816 | unsafe fn test_mm512_multishift_epi64_epi8() { |
817 | let a = _mm512_set1_epi8(1); |
818 | let b = _mm512_set1_epi8(1); |
819 | let r = _mm512_multishift_epi64_epi8(a, b); |
820 | let e = _mm512_set1_epi8(1 << 7); |
821 | assert_eq_m512i(r, e); |
822 | } |
823 | |
824 | #[simd_test(enable = "avx512vbmi" )] |
825 | unsafe fn test_mm512_mask_multishift_epi64_epi8() { |
826 | let a = _mm512_set1_epi8(1); |
827 | let b = _mm512_set1_epi8(1); |
828 | let r = _mm512_mask_multishift_epi64_epi8(a, 0, a, b); |
829 | assert_eq_m512i(r, a); |
830 | let r = _mm512_mask_multishift_epi64_epi8( |
831 | a, |
832 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
833 | a, |
834 | b, |
835 | ); |
836 | let e = _mm512_set1_epi8(1 << 7); |
837 | assert_eq_m512i(r, e); |
838 | } |
839 | |
840 | #[simd_test(enable = "avx512vbmi" )] |
841 | unsafe fn test_mm512_maskz_multishift_epi64_epi8() { |
842 | let a = _mm512_set1_epi8(1); |
843 | let b = _mm512_set1_epi8(1); |
844 | let r = _mm512_maskz_multishift_epi64_epi8(0, a, b); |
845 | assert_eq_m512i(r, _mm512_setzero_si512()); |
846 | let r = _mm512_maskz_multishift_epi64_epi8( |
847 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
848 | a, |
849 | b, |
850 | ); |
851 | let e = _mm512_set1_epi8(1 << 7); |
852 | assert_eq_m512i(r, e); |
853 | } |
854 | |
855 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
856 | unsafe fn test_mm256_multishift_epi64_epi8() { |
857 | let a = _mm256_set1_epi8(1); |
858 | let b = _mm256_set1_epi8(1); |
859 | let r = _mm256_multishift_epi64_epi8(a, b); |
860 | let e = _mm256_set1_epi8(1 << 7); |
861 | assert_eq_m256i(r, e); |
862 | } |
863 | |
864 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
865 | unsafe fn test_mm256_mask_multishift_epi64_epi8() { |
866 | let a = _mm256_set1_epi8(1); |
867 | let b = _mm256_set1_epi8(1); |
868 | let r = _mm256_mask_multishift_epi64_epi8(a, 0, a, b); |
869 | assert_eq_m256i(r, a); |
870 | let r = _mm256_mask_multishift_epi64_epi8(a, 0b11111111_11111111_11111111_11111111, a, b); |
871 | let e = _mm256_set1_epi8(1 << 7); |
872 | assert_eq_m256i(r, e); |
873 | } |
874 | |
875 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
876 | unsafe fn test_mm256_maskz_multishift_epi64_epi8() { |
877 | let a = _mm256_set1_epi8(1); |
878 | let b = _mm256_set1_epi8(1); |
879 | let r = _mm256_maskz_multishift_epi64_epi8(0, a, b); |
880 | assert_eq_m256i(r, _mm256_setzero_si256()); |
881 | let r = _mm256_maskz_multishift_epi64_epi8(0b11111111_11111111_11111111_11111111, a, b); |
882 | let e = _mm256_set1_epi8(1 << 7); |
883 | assert_eq_m256i(r, e); |
884 | } |
885 | |
886 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
887 | unsafe fn test_mm_multishift_epi64_epi8() { |
888 | let a = _mm_set1_epi8(1); |
889 | let b = _mm_set1_epi8(1); |
890 | let r = _mm_multishift_epi64_epi8(a, b); |
891 | let e = _mm_set1_epi8(1 << 7); |
892 | assert_eq_m128i(r, e); |
893 | } |
894 | |
895 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
896 | unsafe fn test_mm_mask_multishift_epi64_epi8() { |
897 | let a = _mm_set1_epi8(1); |
898 | let b = _mm_set1_epi8(1); |
899 | let r = _mm_mask_multishift_epi64_epi8(a, 0, a, b); |
900 | assert_eq_m128i(r, a); |
901 | let r = _mm_mask_multishift_epi64_epi8(a, 0b11111111_11111111, a, b); |
902 | let e = _mm_set1_epi8(1 << 7); |
903 | assert_eq_m128i(r, e); |
904 | } |
905 | |
906 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
907 | unsafe fn test_mm_maskz_multishift_epi64_epi8() { |
908 | let a = _mm_set1_epi8(1); |
909 | let b = _mm_set1_epi8(1); |
910 | let r = _mm_maskz_multishift_epi64_epi8(0, a, b); |
911 | assert_eq_m128i(r, _mm_setzero_si128()); |
912 | let r = _mm_maskz_multishift_epi64_epi8(0b11111111_11111111, a, b); |
913 | let e = _mm_set1_epi8(1 << 7); |
914 | assert_eq_m128i(r, e); |
915 | } |
916 | } |
917 | |