1 | use crate::core_arch::{simd::*, x86::*}; |
2 | use crate::intrinsics::simd::*; |
3 | |
4 | #[cfg (test)] |
5 | use stdarch_test::assert_instr; |
6 | |
7 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. |
8 | /// |
9 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi8&expand=4262) |
10 | #[inline ] |
11 | #[target_feature (enable = "avx512vbmi" )] |
12 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
13 | #[cfg_attr (test, assert_instr(vperm))] //should be vpermi2b |
14 | pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { |
15 | transmute(src:vpermi2b(a:a.as_i8x64(), idx:idx.as_i8x64(), b:b.as_i8x64())) |
16 | } |
17 | |
18 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
19 | /// |
20 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi8&expand=4259) |
21 | #[inline ] |
22 | #[target_feature (enable = "avx512vbmi" )] |
23 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
24 | #[cfg_attr (test, assert_instr(vpermt2b))] |
25 | pub unsafe fn _mm512_mask_permutex2var_epi8( |
26 | a: __m512i, |
27 | k: __mmask64, |
28 | idx: __m512i, |
29 | b: __m512i, |
30 | ) -> __m512i { |
31 | let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); |
32 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i8x64())) |
33 | } |
34 | |
35 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
36 | /// |
37 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi8&expand=4261) |
38 | #[inline ] |
39 | #[target_feature (enable = "avx512vbmi" )] |
40 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
41 | #[cfg_attr (test, assert_instr(vperm))] //should be vpermi2b |
42 | pub unsafe fn _mm512_maskz_permutex2var_epi8( |
43 | k: __mmask64, |
44 | a: __m512i, |
45 | idx: __m512i, |
46 | b: __m512i, |
47 | ) -> __m512i { |
48 | let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); |
49 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
50 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero)) |
51 | } |
52 | |
53 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
54 | /// |
55 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi8&expand=4260) |
56 | #[inline ] |
57 | #[target_feature (enable = "avx512vbmi" )] |
58 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
59 | #[cfg_attr (test, assert_instr(vpermi2b))] |
60 | pub unsafe fn _mm512_mask2_permutex2var_epi8( |
61 | a: __m512i, |
62 | idx: __m512i, |
63 | k: __mmask64, |
64 | b: __m512i, |
65 | ) -> __m512i { |
66 | let permute: i8x64 = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); |
67 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i8x64())) |
68 | } |
69 | |
70 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. |
71 | /// |
72 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi8&expand=4258) |
73 | #[inline ] |
74 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
75 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
76 | #[cfg_attr (test, assert_instr(vperm))] //should be vpermi2b |
77 | pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { |
78 | transmute(src:vpermi2b256(a:a.as_i8x32(), idx:idx.as_i8x32(), b:b.as_i8x32())) |
79 | } |
80 | |
81 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
82 | /// |
83 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi8&expand=4255) |
84 | #[inline ] |
85 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
86 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
87 | #[cfg_attr (test, assert_instr(vpermt2b))] |
88 | pub unsafe fn _mm256_mask_permutex2var_epi8( |
89 | a: __m256i, |
90 | k: __mmask32, |
91 | idx: __m256i, |
92 | b: __m256i, |
93 | ) -> __m256i { |
94 | let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); |
95 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i8x32())) |
96 | } |
97 | |
98 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
99 | /// |
100 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi8&expand=4257) |
101 | #[inline ] |
102 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
103 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
104 | #[cfg_attr (test, assert_instr(vperm))] //should be vpermi2b |
105 | pub unsafe fn _mm256_maskz_permutex2var_epi8( |
106 | k: __mmask32, |
107 | a: __m256i, |
108 | idx: __m256i, |
109 | b: __m256i, |
110 | ) -> __m256i { |
111 | let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); |
112 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
113 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero)) |
114 | } |
115 | |
116 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
117 | /// |
118 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi8&expand=4256) |
119 | #[inline ] |
120 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
121 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
122 | #[cfg_attr (test, assert_instr(vpermi2b))] |
123 | pub unsafe fn _mm256_mask2_permutex2var_epi8( |
124 | a: __m256i, |
125 | idx: __m256i, |
126 | k: __mmask32, |
127 | b: __m256i, |
128 | ) -> __m256i { |
129 | let permute: i8x32 = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); |
130 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i8x32())) |
131 | } |
132 | |
133 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. |
134 | /// |
135 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi8&expand=4254) |
136 | #[inline ] |
137 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
138 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
139 | #[cfg_attr (test, assert_instr(vperm))] //should be vpermi2b |
140 | pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { |
141 | transmute(src:vpermi2b128(a:a.as_i8x16(), idx:idx.as_i8x16(), b:b.as_i8x16())) |
142 | } |
143 | |
144 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
145 | /// |
146 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi8&expand=4251) |
147 | #[inline ] |
148 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
149 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
150 | #[cfg_attr (test, assert_instr(vpermt2b))] |
151 | pub unsafe fn _mm_mask_permutex2var_epi8( |
152 | a: __m128i, |
153 | k: __mmask16, |
154 | idx: __m128i, |
155 | b: __m128i, |
156 | ) -> __m128i { |
157 | let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); |
158 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i8x16())) |
159 | } |
160 | |
161 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
162 | /// |
163 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi8&expand=4253) |
164 | #[inline ] |
165 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
166 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
167 | #[cfg_attr (test, assert_instr(vperm))] //should be vpermi2b |
168 | pub unsafe fn _mm_maskz_permutex2var_epi8( |
169 | k: __mmask16, |
170 | a: __m128i, |
171 | idx: __m128i, |
172 | b: __m128i, |
173 | ) -> __m128i { |
174 | let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); |
175 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
176 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero)) |
177 | } |
178 | |
179 | /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
180 | /// |
181 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi8&expand=4252) |
182 | #[inline ] |
183 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
184 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
185 | #[cfg_attr (test, assert_instr(vpermi2b))] |
186 | pub unsafe fn _mm_mask2_permutex2var_epi8( |
187 | a: __m128i, |
188 | idx: __m128i, |
189 | k: __mmask16, |
190 | b: __m128i, |
191 | ) -> __m128i { |
192 | let permute: i8x16 = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); |
193 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i8x16())) |
194 | } |
195 | |
196 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. |
197 | /// |
198 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi8&expand=4316) |
199 | #[inline ] |
200 | #[target_feature (enable = "avx512vbmi" )] |
201 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
202 | #[cfg_attr (test, assert_instr(vpermb))] |
203 | pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i { |
204 | transmute(src:vpermb(a:a.as_i8x64(), idx:idx.as_i8x64())) |
205 | } |
206 | |
207 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
208 | /// |
209 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi8&expand=4314) |
210 | #[inline ] |
211 | #[target_feature (enable = "avx512vbmi" )] |
212 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
213 | #[cfg_attr (test, assert_instr(vpermb))] |
214 | pub unsafe fn _mm512_mask_permutexvar_epi8( |
215 | src: __m512i, |
216 | k: __mmask64, |
217 | idx: __m512i, |
218 | a: __m512i, |
219 | ) -> __m512i { |
220 | let permute: i8x64 = _mm512_permutexvar_epi8(idx, a).as_i8x64(); |
221 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i8x64())) |
222 | } |
223 | |
224 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
225 | /// |
226 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi8&expand=4315) |
227 | #[inline ] |
228 | #[target_feature (enable = "avx512vbmi" )] |
229 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
230 | #[cfg_attr (test, assert_instr(vpermb))] |
231 | pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i { |
232 | let permute: i8x64 = _mm512_permutexvar_epi8(idx, a).as_i8x64(); |
233 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
234 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero)) |
235 | } |
236 | |
237 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. |
238 | /// |
239 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi8&expand=4313) |
240 | #[inline ] |
241 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
242 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
243 | #[cfg_attr (test, assert_instr(vpermb))] |
244 | pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i { |
245 | transmute(src:vpermb256(a:a.as_i8x32(), idx:idx.as_i8x32())) |
246 | } |
247 | |
248 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
249 | /// |
250 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi8&expand=4311) |
251 | #[inline ] |
252 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
253 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
254 | #[cfg_attr (test, assert_instr(vpermb))] |
255 | pub unsafe fn _mm256_mask_permutexvar_epi8( |
256 | src: __m256i, |
257 | k: __mmask32, |
258 | idx: __m256i, |
259 | a: __m256i, |
260 | ) -> __m256i { |
261 | let permute: i8x32 = _mm256_permutexvar_epi8(idx, a).as_i8x32(); |
262 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i8x32())) |
263 | } |
264 | |
265 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
266 | /// |
267 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi8&expand=4312) |
268 | #[inline ] |
269 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
270 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
271 | #[cfg_attr (test, assert_instr(vpermb))] |
272 | pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i { |
273 | let permute: i8x32 = _mm256_permutexvar_epi8(idx, a).as_i8x32(); |
274 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
275 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero)) |
276 | } |
277 | |
278 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. |
279 | /// |
280 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi8&expand=4310) |
281 | #[inline ] |
282 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
283 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
284 | #[cfg_attr (test, assert_instr(vpermb))] |
285 | pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i { |
286 | transmute(src:vpermb128(a:a.as_i8x16(), idx:idx.as_i8x16())) |
287 | } |
288 | |
289 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
290 | /// |
291 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi8&expand=4308) |
292 | #[inline ] |
293 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
294 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
295 | #[cfg_attr (test, assert_instr(vpermb))] |
296 | pub unsafe fn _mm_mask_permutexvar_epi8( |
297 | src: __m128i, |
298 | k: __mmask16, |
299 | idx: __m128i, |
300 | a: __m128i, |
301 | ) -> __m128i { |
302 | let permute: i8x16 = _mm_permutexvar_epi8(idx, a).as_i8x16(); |
303 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i8x16())) |
304 | } |
305 | |
306 | /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
307 | /// |
308 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi8&expand=4309) |
309 | #[inline ] |
310 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
311 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
312 | #[cfg_attr (test, assert_instr(vpermb))] |
313 | pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i { |
314 | let permute: i8x16 = _mm_permutexvar_epi8(idx, a).as_i8x16(); |
315 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
316 | transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero)) |
317 | } |
318 | |
319 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. |
320 | /// |
321 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_multishift_epi64_epi8&expand=4026) |
322 | #[inline ] |
323 | #[target_feature (enable = "avx512vbmi" )] |
324 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
325 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
326 | pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i { |
327 | transmute(src:vpmultishiftqb(a:a.as_i8x64(), b:b.as_i8x64())) |
328 | } |
329 | |
330 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
331 | /// |
332 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_multishift_epi64_epi8&expand=4024) |
333 | #[inline ] |
334 | #[target_feature (enable = "avx512vbmi" )] |
335 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
336 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
337 | pub unsafe fn _mm512_mask_multishift_epi64_epi8( |
338 | src: __m512i, |
339 | k: __mmask64, |
340 | a: __m512i, |
341 | b: __m512i, |
342 | ) -> __m512i { |
343 | let multishift: i8x64 = _mm512_multishift_epi64_epi8(a, b).as_i8x64(); |
344 | transmute(src:simd_select_bitmask(m:k, yes:multishift, no:src.as_i8x64())) |
345 | } |
346 | |
347 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
348 | /// |
349 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_multishift_epi64_epi8&expand=4025) |
350 | #[inline ] |
351 | #[target_feature (enable = "avx512vbmi" )] |
352 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
353 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
354 | pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
355 | let multishift: i8x64 = _mm512_multishift_epi64_epi8(a, b).as_i8x64(); |
356 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
357 | transmute(src:simd_select_bitmask(m:k, yes:multishift, no:zero)) |
358 | } |
359 | |
360 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. |
361 | /// |
362 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_multishift_epi64_epi8&expand=4023) |
363 | #[inline ] |
364 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
365 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
366 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
367 | pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i { |
368 | transmute(src:vpmultishiftqb256(a:a.as_i8x32(), b:b.as_i8x32())) |
369 | } |
370 | |
371 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
372 | /// |
373 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_multishift_epi64_epi8&expand=4021) |
374 | #[inline ] |
375 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
376 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
377 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
378 | pub unsafe fn _mm256_mask_multishift_epi64_epi8( |
379 | src: __m256i, |
380 | k: __mmask32, |
381 | a: __m256i, |
382 | b: __m256i, |
383 | ) -> __m256i { |
384 | let multishift: i8x32 = _mm256_multishift_epi64_epi8(a, b).as_i8x32(); |
385 | transmute(src:simd_select_bitmask(m:k, yes:multishift, no:src.as_i8x32())) |
386 | } |
387 | |
388 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
389 | /// |
390 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_multishift_epi64_epi8&expand=4022) |
391 | #[inline ] |
392 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
393 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
394 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
395 | pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
396 | let multishift: i8x32 = _mm256_multishift_epi64_epi8(a, b).as_i8x32(); |
397 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
398 | transmute(src:simd_select_bitmask(m:k, yes:multishift, no:zero)) |
399 | } |
400 | |
401 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. |
402 | /// |
403 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_multishift_epi64_epi8&expand=4020) |
404 | #[inline ] |
405 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
406 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
407 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
408 | pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i { |
409 | transmute(src:vpmultishiftqb128(a:a.as_i8x16(), b:b.as_i8x16())) |
410 | } |
411 | |
412 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
413 | /// |
414 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_multishift_epi64_epi8&expand=4018) |
415 | #[inline ] |
416 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
417 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
418 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
419 | pub unsafe fn _mm_mask_multishift_epi64_epi8( |
420 | src: __m128i, |
421 | k: __mmask16, |
422 | a: __m128i, |
423 | b: __m128i, |
424 | ) -> __m128i { |
425 | let multishift: i8x16 = _mm_multishift_epi64_epi8(a, b).as_i8x16(); |
426 | transmute(src:simd_select_bitmask(m:k, yes:multishift, no:src.as_i8x16())) |
427 | } |
428 | |
429 | /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
430 | /// |
431 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_multishift_epi64_epi8&expand=4019) |
432 | #[inline ] |
433 | #[target_feature (enable = "avx512vbmi,avx512vl" )] |
434 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
435 | #[cfg_attr (test, assert_instr(vpmultishiftqb))] |
436 | pub unsafe fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
437 | let multishift: i8x16 = _mm_multishift_epi64_epi8(a, b).as_i8x16(); |
438 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
439 | transmute(src:simd_select_bitmask(m:k, yes:multishift, no:zero)) |
440 | } |
441 | |
442 | #[allow (improper_ctypes)] |
443 | extern "C" { |
444 | #[link_name = "llvm.x86.avx512.vpermi2var.qi.512" ] |
445 | fn vpermi2b(a: i8x64, idx: i8x64, b: i8x64) -> i8x64; |
446 | #[link_name = "llvm.x86.avx512.vpermi2var.qi.256" ] |
447 | fn vpermi2b256(a: i8x32, idx: i8x32, b: i8x32) -> i8x32; |
448 | #[link_name = "llvm.x86.avx512.vpermi2var.qi.128" ] |
449 | fn vpermi2b128(a: i8x16, idx: i8x16, b: i8x16) -> i8x16; |
450 | |
451 | #[link_name = "llvm.x86.avx512.permvar.qi.512" ] |
452 | fn vpermb(a: i8x64, idx: i8x64) -> i8x64; |
453 | #[link_name = "llvm.x86.avx512.permvar.qi.256" ] |
454 | fn vpermb256(a: i8x32, idx: i8x32) -> i8x32; |
455 | #[link_name = "llvm.x86.avx512.permvar.qi.128" ] |
456 | fn vpermb128(a: i8x16, idx: i8x16) -> i8x16; |
457 | |
458 | #[link_name = "llvm.x86.avx512.pmultishift.qb.512" ] |
459 | fn vpmultishiftqb(a: i8x64, b: i8x64) -> i8x64; |
460 | #[link_name = "llvm.x86.avx512.pmultishift.qb.256" ] |
461 | fn vpmultishiftqb256(a: i8x32, b: i8x32) -> i8x32; |
462 | #[link_name = "llvm.x86.avx512.pmultishift.qb.128" ] |
463 | fn vpmultishiftqb128(a: i8x16, b: i8x16) -> i8x16; |
464 | } |
465 | |
466 | #[cfg (test)] |
467 | mod tests { |
468 | |
469 | use stdarch_test::simd_test; |
470 | |
471 | use crate::core_arch::x86::*; |
472 | |
473 | #[simd_test(enable = "avx512vbmi" )] |
474 | unsafe fn test_mm512_permutex2var_epi8() { |
475 | #[rustfmt::skip] |
476 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
477 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
478 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
479 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
480 | #[rustfmt::skip] |
481 | let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6, |
482 | 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6, |
483 | 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6, |
484 | 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6); |
485 | let b = _mm512_set1_epi8(100); |
486 | let r = _mm512_permutex2var_epi8(a, idx, b); |
487 | #[rustfmt::skip] |
488 | let e = _mm512_set_epi8( |
489 | 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100, |
490 | 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100, |
491 | 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100, |
492 | 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100, |
493 | ); |
494 | assert_eq_m512i(r, e); |
495 | } |
496 | |
497 | #[simd_test(enable = "avx512vbmi" )] |
498 | unsafe fn test_mm512_mask_permutex2var_epi8() { |
499 | #[rustfmt::skip] |
500 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
501 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
502 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
503 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
504 | #[rustfmt::skip] |
505 | let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6, |
506 | 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6, |
507 | 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6, |
508 | 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6); |
509 | let b = _mm512_set1_epi8(100); |
510 | let r = _mm512_mask_permutex2var_epi8(a, 0, idx, b); |
511 | assert_eq_m512i(r, a); |
512 | let r = _mm512_mask_permutex2var_epi8( |
513 | a, |
514 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
515 | idx, |
516 | b, |
517 | ); |
518 | #[rustfmt::skip] |
519 | let e = _mm512_set_epi8( |
520 | 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100, |
521 | 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100, |
522 | 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100, |
523 | 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100, |
524 | ); |
525 | assert_eq_m512i(r, e); |
526 | } |
527 | |
528 | #[simd_test(enable = "avx512vbmi" )] |
529 | unsafe fn test_mm512_maskz_permutex2var_epi8() { |
530 | #[rustfmt::skip] |
531 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
532 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
533 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
534 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
535 | #[rustfmt::skip] |
536 | let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6, |
537 | 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6, |
538 | 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6, |
539 | 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6); |
540 | let b = _mm512_set1_epi8(100); |
541 | let r = _mm512_maskz_permutex2var_epi8(0, a, idx, b); |
542 | assert_eq_m512i(r, _mm512_setzero_si512()); |
543 | let r = _mm512_maskz_permutex2var_epi8( |
544 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
545 | a, |
546 | idx, |
547 | b, |
548 | ); |
549 | #[rustfmt::skip] |
550 | let e = _mm512_set_epi8( |
551 | 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100, |
552 | 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100, |
553 | 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100, |
554 | 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100, |
555 | ); |
556 | assert_eq_m512i(r, e); |
557 | } |
558 | |
559 | #[simd_test(enable = "avx512vbmi" )] |
560 | unsafe fn test_mm512_mask2_permutex2var_epi8() { |
561 | #[rustfmt::skip] |
562 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
563 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
564 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
565 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
566 | #[rustfmt::skip] |
567 | let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6, |
568 | 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6, |
569 | 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6, |
570 | 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6); |
571 | let b = _mm512_set1_epi8(100); |
572 | let r = _mm512_mask2_permutex2var_epi8(a, idx, 0, b); |
573 | assert_eq_m512i(r, idx); |
574 | let r = _mm512_mask2_permutex2var_epi8( |
575 | a, |
576 | idx, |
577 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
578 | b, |
579 | ); |
580 | #[rustfmt::skip] |
581 | let e = _mm512_set_epi8( |
582 | 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100, |
583 | 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100, |
584 | 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100, |
585 | 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100, |
586 | ); |
587 | assert_eq_m512i(r, e); |
588 | } |
589 | |
590 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
591 | unsafe fn test_mm256_permutex2var_epi8() { |
592 | #[rustfmt::skip] |
593 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
594 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
595 | #[rustfmt::skip] |
596 | let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, |
597 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); |
598 | let b = _mm256_set1_epi8(100); |
599 | let r = _mm256_permutex2var_epi8(a, idx, b); |
600 | #[rustfmt::skip] |
601 | let e = _mm256_set_epi8( |
602 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, |
603 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, |
604 | ); |
605 | assert_eq_m256i(r, e); |
606 | } |
607 | |
608 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
609 | unsafe fn test_mm256_mask_permutex2var_epi8() { |
610 | #[rustfmt::skip] |
611 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
612 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
613 | #[rustfmt::skip] |
614 | let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, |
615 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); |
616 | let b = _mm256_set1_epi8(100); |
617 | let r = _mm256_mask_permutex2var_epi8(a, 0, idx, b); |
618 | assert_eq_m256i(r, a); |
619 | let r = _mm256_mask_permutex2var_epi8(a, 0b11111111_11111111_11111111_11111111, idx, b); |
620 | #[rustfmt::skip] |
621 | let e = _mm256_set_epi8( |
622 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, |
623 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, |
624 | ); |
625 | assert_eq_m256i(r, e); |
626 | } |
627 | |
628 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
629 | unsafe fn test_mm256_maskz_permutex2var_epi8() { |
630 | #[rustfmt::skip] |
631 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
632 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
633 | #[rustfmt::skip] |
634 | let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, |
635 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); |
636 | let b = _mm256_set1_epi8(100); |
637 | let r = _mm256_maskz_permutex2var_epi8(0, a, idx, b); |
638 | assert_eq_m256i(r, _mm256_setzero_si256()); |
639 | let r = _mm256_maskz_permutex2var_epi8(0b11111111_11111111_11111111_11111111, a, idx, b); |
640 | #[rustfmt::skip] |
641 | let e = _mm256_set_epi8( |
642 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, |
643 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, |
644 | ); |
645 | assert_eq_m256i(r, e); |
646 | } |
647 | |
648 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
649 | unsafe fn test_mm256_mask2_permutex2var_epi8() { |
650 | #[rustfmt::skip] |
651 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
652 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
653 | #[rustfmt::skip] |
654 | let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, |
655 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); |
656 | let b = _mm256_set1_epi8(100); |
657 | let r = _mm256_mask2_permutex2var_epi8(a, idx, 0, b); |
658 | assert_eq_m256i(r, idx); |
659 | let r = _mm256_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111_11111111_11111111, b); |
660 | #[rustfmt::skip] |
661 | let e = _mm256_set_epi8( |
662 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, |
663 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, |
664 | ); |
665 | assert_eq_m256i(r, e); |
666 | } |
667 | |
668 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
669 | unsafe fn test_mm_permutex2var_epi8() { |
670 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
671 | #[rustfmt::skip] |
672 | let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4); |
673 | let b = _mm_set1_epi8(100); |
674 | let r = _mm_permutex2var_epi8(a, idx, b); |
675 | let e = _mm_set_epi8( |
676 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, |
677 | ); |
678 | assert_eq_m128i(r, e); |
679 | } |
680 | |
681 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
682 | unsafe fn test_mm_mask_permutex2var_epi8() { |
683 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
684 | #[rustfmt::skip] |
685 | let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4); |
686 | let b = _mm_set1_epi8(100); |
687 | let r = _mm_mask_permutex2var_epi8(a, 0, idx, b); |
688 | assert_eq_m128i(r, a); |
689 | let r = _mm_mask_permutex2var_epi8(a, 0b11111111_11111111, idx, b); |
690 | let e = _mm_set_epi8( |
691 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, |
692 | ); |
693 | assert_eq_m128i(r, e); |
694 | } |
695 | |
696 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
697 | unsafe fn test_mm_maskz_permutex2var_epi8() { |
698 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
699 | #[rustfmt::skip] |
700 | let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4); |
701 | let b = _mm_set1_epi8(100); |
702 | let r = _mm_maskz_permutex2var_epi8(0, a, idx, b); |
703 | assert_eq_m128i(r, _mm_setzero_si128()); |
704 | let r = _mm_maskz_permutex2var_epi8(0b11111111_11111111, a, idx, b); |
705 | let e = _mm_set_epi8( |
706 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, |
707 | ); |
708 | assert_eq_m128i(r, e); |
709 | } |
710 | |
711 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
712 | unsafe fn test_mm_mask2_permutex2var_epi8() { |
713 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
714 | #[rustfmt::skip] |
715 | let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4); |
716 | let b = _mm_set1_epi8(100); |
717 | let r = _mm_mask2_permutex2var_epi8(a, idx, 0, b); |
718 | assert_eq_m128i(r, idx); |
719 | let r = _mm_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111, b); |
720 | let e = _mm_set_epi8( |
721 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, |
722 | ); |
723 | assert_eq_m128i(r, e); |
724 | } |
725 | |
726 | #[simd_test(enable = "avx512vbmi" )] |
727 | unsafe fn test_mm512_permutexvar_epi8() { |
728 | let idx = _mm512_set1_epi8(1); |
729 | #[rustfmt::skip] |
730 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
731 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
732 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
733 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
734 | let r = _mm512_permutexvar_epi8(idx, a); |
735 | let e = _mm512_set1_epi8(62); |
736 | assert_eq_m512i(r, e); |
737 | } |
738 | |
739 | #[simd_test(enable = "avx512vbmi" )] |
740 | unsafe fn test_mm512_mask_permutexvar_epi8() { |
741 | let idx = _mm512_set1_epi8(1); |
742 | #[rustfmt::skip] |
743 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
744 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
745 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
746 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
747 | let r = _mm512_mask_permutexvar_epi8(a, 0, idx, a); |
748 | assert_eq_m512i(r, a); |
749 | let r = _mm512_mask_permutexvar_epi8( |
750 | a, |
751 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
752 | idx, |
753 | a, |
754 | ); |
755 | let e = _mm512_set1_epi8(62); |
756 | assert_eq_m512i(r, e); |
757 | } |
758 | |
759 | #[simd_test(enable = "avx512vbmi" )] |
760 | unsafe fn test_mm512_maskz_permutexvar_epi8() { |
761 | let idx = _mm512_set1_epi8(1); |
762 | #[rustfmt::skip] |
763 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
764 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
765 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
766 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
767 | let r = _mm512_maskz_permutexvar_epi8(0, idx, a); |
768 | assert_eq_m512i(r, _mm512_setzero_si512()); |
769 | let r = _mm512_maskz_permutexvar_epi8( |
770 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
771 | idx, |
772 | a, |
773 | ); |
774 | let e = _mm512_set1_epi8(62); |
775 | assert_eq_m512i(r, e); |
776 | } |
777 | |
778 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
779 | unsafe fn test_mm256_permutexvar_epi8() { |
780 | let idx = _mm256_set1_epi8(1); |
781 | #[rustfmt::skip] |
782 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
783 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
784 | let r = _mm256_permutexvar_epi8(idx, a); |
785 | let e = _mm256_set1_epi8(30); |
786 | assert_eq_m256i(r, e); |
787 | } |
788 | |
789 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
790 | unsafe fn test_mm256_mask_permutexvar_epi8() { |
791 | let idx = _mm256_set1_epi8(1); |
792 | #[rustfmt::skip] |
793 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
794 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
795 | let r = _mm256_mask_permutexvar_epi8(a, 0, idx, a); |
796 | assert_eq_m256i(r, a); |
797 | let r = _mm256_mask_permutexvar_epi8(a, 0b11111111_11111111_11111111_11111111, idx, a); |
798 | let e = _mm256_set1_epi8(30); |
799 | assert_eq_m256i(r, e); |
800 | } |
801 | |
802 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
803 | unsafe fn test_mm256_maskz_permutexvar_epi8() { |
804 | let idx = _mm256_set1_epi8(1); |
805 | #[rustfmt::skip] |
806 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
807 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
808 | let r = _mm256_maskz_permutexvar_epi8(0, idx, a); |
809 | assert_eq_m256i(r, _mm256_setzero_si256()); |
810 | let r = _mm256_maskz_permutexvar_epi8(0b11111111_11111111_11111111_11111111, idx, a); |
811 | let e = _mm256_set1_epi8(30); |
812 | assert_eq_m256i(r, e); |
813 | } |
814 | |
815 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
816 | unsafe fn test_mm_permutexvar_epi8() { |
817 | let idx = _mm_set1_epi8(1); |
818 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
819 | let r = _mm_permutexvar_epi8(idx, a); |
820 | let e = _mm_set1_epi8(14); |
821 | assert_eq_m128i(r, e); |
822 | } |
823 | |
824 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
825 | unsafe fn test_mm_mask_permutexvar_epi8() { |
826 | let idx = _mm_set1_epi8(1); |
827 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
828 | let r = _mm_mask_permutexvar_epi8(a, 0, idx, a); |
829 | assert_eq_m128i(r, a); |
830 | let r = _mm_mask_permutexvar_epi8(a, 0b11111111_11111111, idx, a); |
831 | let e = _mm_set1_epi8(14); |
832 | assert_eq_m128i(r, e); |
833 | } |
834 | |
835 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
836 | unsafe fn test_mm_maskz_permutexvar_epi8() { |
837 | let idx = _mm_set1_epi8(1); |
838 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
839 | let r = _mm_maskz_permutexvar_epi8(0, idx, a); |
840 | assert_eq_m128i(r, _mm_setzero_si128()); |
841 | let r = _mm_maskz_permutexvar_epi8(0b11111111_11111111, idx, a); |
842 | let e = _mm_set1_epi8(14); |
843 | assert_eq_m128i(r, e); |
844 | } |
845 | |
846 | #[simd_test(enable = "avx512vbmi" )] |
847 | unsafe fn test_mm512_multishift_epi64_epi8() { |
848 | let a = _mm512_set1_epi8(1); |
849 | let b = _mm512_set1_epi8(1); |
850 | let r = _mm512_multishift_epi64_epi8(a, b); |
851 | let e = _mm512_set1_epi8(1 << 7); |
852 | assert_eq_m512i(r, e); |
853 | } |
854 | |
855 | #[simd_test(enable = "avx512vbmi" )] |
856 | unsafe fn test_mm512_mask_multishift_epi64_epi8() { |
857 | let a = _mm512_set1_epi8(1); |
858 | let b = _mm512_set1_epi8(1); |
859 | let r = _mm512_mask_multishift_epi64_epi8(a, 0, a, b); |
860 | assert_eq_m512i(r, a); |
861 | let r = _mm512_mask_multishift_epi64_epi8( |
862 | a, |
863 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
864 | a, |
865 | b, |
866 | ); |
867 | let e = _mm512_set1_epi8(1 << 7); |
868 | assert_eq_m512i(r, e); |
869 | } |
870 | |
871 | #[simd_test(enable = "avx512vbmi" )] |
872 | unsafe fn test_mm512_maskz_multishift_epi64_epi8() { |
873 | let a = _mm512_set1_epi8(1); |
874 | let b = _mm512_set1_epi8(1); |
875 | let r = _mm512_maskz_multishift_epi64_epi8(0, a, b); |
876 | assert_eq_m512i(r, _mm512_setzero_si512()); |
877 | let r = _mm512_maskz_multishift_epi64_epi8( |
878 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
879 | a, |
880 | b, |
881 | ); |
882 | let e = _mm512_set1_epi8(1 << 7); |
883 | assert_eq_m512i(r, e); |
884 | } |
885 | |
886 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
887 | unsafe fn test_mm256_multishift_epi64_epi8() { |
888 | let a = _mm256_set1_epi8(1); |
889 | let b = _mm256_set1_epi8(1); |
890 | let r = _mm256_multishift_epi64_epi8(a, b); |
891 | let e = _mm256_set1_epi8(1 << 7); |
892 | assert_eq_m256i(r, e); |
893 | } |
894 | |
895 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
896 | unsafe fn test_mm256_mask_multishift_epi64_epi8() { |
897 | let a = _mm256_set1_epi8(1); |
898 | let b = _mm256_set1_epi8(1); |
899 | let r = _mm256_mask_multishift_epi64_epi8(a, 0, a, b); |
900 | assert_eq_m256i(r, a); |
901 | let r = _mm256_mask_multishift_epi64_epi8(a, 0b11111111_11111111_11111111_11111111, a, b); |
902 | let e = _mm256_set1_epi8(1 << 7); |
903 | assert_eq_m256i(r, e); |
904 | } |
905 | |
906 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
907 | unsafe fn test_mm256_maskz_multishift_epi64_epi8() { |
908 | let a = _mm256_set1_epi8(1); |
909 | let b = _mm256_set1_epi8(1); |
910 | let r = _mm256_maskz_multishift_epi64_epi8(0, a, b); |
911 | assert_eq_m256i(r, _mm256_setzero_si256()); |
912 | let r = _mm256_maskz_multishift_epi64_epi8(0b11111111_11111111_11111111_11111111, a, b); |
913 | let e = _mm256_set1_epi8(1 << 7); |
914 | assert_eq_m256i(r, e); |
915 | } |
916 | |
917 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
918 | unsafe fn test_mm_multishift_epi64_epi8() { |
919 | let a = _mm_set1_epi8(1); |
920 | let b = _mm_set1_epi8(1); |
921 | let r = _mm_multishift_epi64_epi8(a, b); |
922 | let e = _mm_set1_epi8(1 << 7); |
923 | assert_eq_m128i(r, e); |
924 | } |
925 | |
926 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
927 | unsafe fn test_mm_mask_multishift_epi64_epi8() { |
928 | let a = _mm_set1_epi8(1); |
929 | let b = _mm_set1_epi8(1); |
930 | let r = _mm_mask_multishift_epi64_epi8(a, 0, a, b); |
931 | assert_eq_m128i(r, a); |
932 | let r = _mm_mask_multishift_epi64_epi8(a, 0b11111111_11111111, a, b); |
933 | let e = _mm_set1_epi8(1 << 7); |
934 | assert_eq_m128i(r, e); |
935 | } |
936 | |
937 | #[simd_test(enable = "avx512vbmi,avx512vl" )] |
938 | unsafe fn test_mm_maskz_multishift_epi64_epi8() { |
939 | let a = _mm_set1_epi8(1); |
940 | let b = _mm_set1_epi8(1); |
941 | let r = _mm_maskz_multishift_epi64_epi8(0, a, b); |
942 | assert_eq_m128i(r, _mm_setzero_si128()); |
943 | let r = _mm_maskz_multishift_epi64_epi8(0b11111111_11111111, a, b); |
944 | let e = _mm_set1_epi8(1 << 7); |
945 | assert_eq_m128i(r, e); |
946 | } |
947 | } |
948 | |