1 | use crate::{ |
2 | arch::asm, |
3 | core_arch::{simd::*, x86::*}, |
4 | intrinsics::simd::*, |
5 | }; |
6 | |
7 | #[cfg (test)] |
8 | use stdarch_test::assert_instr; |
9 | |
10 | /// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
11 | /// |
12 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi16) |
13 | #[inline ] |
14 | #[target_feature (enable = "avx512f,avx512bw,avx512vbmi2" )] |
15 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
16 | pub unsafe fn _mm512_mask_expandloadu_epi16( |
17 | src: __m512i, |
18 | k: __mmask32, |
19 | mem_addr: *const i16, |
20 | ) -> __m512i { |
21 | let mut dst: __m512i = src; |
22 | asm!( |
23 | vpl!("vpexpandw {dst}{{{k}}}" ), |
24 | p = in(reg) mem_addr, |
25 | k = in(kreg) k, |
26 | dst = inout(zmm_reg) dst, |
27 | options(pure, readonly, nostack) |
28 | ); |
29 | dst |
30 | } |
31 | |
32 | /// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
33 | /// |
34 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi16) |
35 | #[inline ] |
36 | #[target_feature (enable = "avx512f,avx512bw,avx512vbmi2" )] |
37 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
38 | pub unsafe fn _mm512_maskz_expandloadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i { |
39 | let mut dst: __m512i; |
40 | asm!( |
41 | vpl!("vpexpandw {dst}{{{k}}} {{z}}" ), |
42 | p = in(reg) mem_addr, |
43 | k = in(kreg) k, |
44 | dst = out(zmm_reg) dst, |
45 | options(pure, readonly, nostack) |
46 | ); |
47 | dst |
48 | } |
49 | |
50 | /// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
51 | /// |
52 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi16) |
53 | #[inline ] |
54 | #[target_feature (enable = "avx512f,avx512vbmi2,avx512vl,avx" )] |
55 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
56 | pub unsafe fn _mm256_mask_expandloadu_epi16( |
57 | src: __m256i, |
58 | k: __mmask16, |
59 | mem_addr: *const i16, |
60 | ) -> __m256i { |
61 | let mut dst: __m256i = src; |
62 | asm!( |
63 | vpl!("vpexpandw {dst}{{{k}}}" ), |
64 | p = in(reg) mem_addr, |
65 | k = in(kreg) k, |
66 | dst = inout(ymm_reg) dst, |
67 | options(pure, readonly, nostack) |
68 | ); |
69 | dst |
70 | } |
71 | |
72 | /// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
73 | /// |
74 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi16) |
75 | #[inline ] |
76 | #[target_feature (enable = "avx512f,avx512vbmi2,avx512vl,avx" )] |
77 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
78 | pub unsafe fn _mm256_maskz_expandloadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i { |
79 | let mut dst: __m256i; |
80 | asm!( |
81 | vpl!("vpexpandw {dst}{{{k}}} {{z}}" ), |
82 | p = in(reg) mem_addr, |
83 | k = in(kreg) k, |
84 | dst = out(ymm_reg) dst, |
85 | options(pure, readonly, nostack) |
86 | ); |
87 | dst |
88 | } |
89 | |
90 | /// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
91 | /// |
92 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi16) |
93 | #[inline ] |
94 | #[target_feature (enable = "avx512f,avx512vbmi2,avx512vl,avx,sse" )] |
95 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
96 | pub unsafe fn _mm_mask_expandloadu_epi16( |
97 | src: __m128i, |
98 | k: __mmask8, |
99 | mem_addr: *const i16, |
100 | ) -> __m128i { |
101 | let mut dst: __m128i = src; |
102 | asm!( |
103 | vpl!("vpexpandw {dst}{{{k}}}" ), |
104 | p = in(reg) mem_addr, |
105 | k = in(kreg) k, |
106 | dst = inout(xmm_reg) dst, |
107 | options(pure, readonly, nostack) |
108 | ); |
109 | dst |
110 | } |
111 | |
112 | /// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
113 | /// |
114 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi16) |
115 | #[inline ] |
116 | #[target_feature (enable = "avx512f,avx512vbmi2,avx512vl,avx,sse" )] |
117 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
118 | pub unsafe fn _mm_maskz_expandloadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i { |
119 | let mut dst: __m128i; |
120 | asm!( |
121 | vpl!("vpexpandw {dst}{{{k}}} {{z}}" ), |
122 | p = in(reg) mem_addr, |
123 | k = in(kreg) k, |
124 | dst = out(xmm_reg) dst, |
125 | options(pure, readonly, nostack) |
126 | ); |
127 | dst |
128 | } |
129 | |
130 | /// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
131 | /// |
132 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi8) |
133 | #[inline ] |
134 | #[target_feature (enable = "avx512f,avx512bw,avx512vbmi2" )] |
135 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
136 | pub unsafe fn _mm512_mask_expandloadu_epi8( |
137 | src: __m512i, |
138 | k: __mmask64, |
139 | mem_addr: *const i8, |
140 | ) -> __m512i { |
141 | let mut dst: __m512i = src; |
142 | asm!( |
143 | vpl!("vpexpandb {dst}{{{k}}}" ), |
144 | p = in(reg) mem_addr, |
145 | k = in(kreg) k, |
146 | dst = inout(zmm_reg) dst, |
147 | options(pure, readonly, nostack) |
148 | ); |
149 | dst |
150 | } |
151 | |
152 | /// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
153 | /// |
154 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi8) |
155 | #[inline ] |
156 | #[target_feature (enable = "avx512f,avx512bw,avx512vbmi2" )] |
157 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
158 | pub unsafe fn _mm512_maskz_expandloadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i { |
159 | let mut dst: __m512i; |
160 | asm!( |
161 | vpl!("vpexpandb {dst}{{{k}}} {{z}}" ), |
162 | p = in(reg) mem_addr, |
163 | k = in(kreg) k, |
164 | dst = out(zmm_reg) dst, |
165 | options(pure, readonly, nostack) |
166 | ); |
167 | dst |
168 | } |
169 | |
170 | /// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
171 | /// |
172 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi8) |
173 | #[inline ] |
174 | #[target_feature (enable = "avx512f,avx512bw,avx512vbmi2,avx512vl,avx" )] |
175 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
176 | pub unsafe fn _mm256_mask_expandloadu_epi8( |
177 | src: __m256i, |
178 | k: __mmask32, |
179 | mem_addr: *const i8, |
180 | ) -> __m256i { |
181 | let mut dst: __m256i = src; |
182 | asm!( |
183 | vpl!("vpexpandb {dst}{{{k}}}" ), |
184 | p = in(reg) mem_addr, |
185 | k = in(kreg) k, |
186 | dst = inout(ymm_reg) dst, |
187 | options(pure, readonly, nostack) |
188 | ); |
189 | dst |
190 | } |
191 | |
192 | /// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
193 | /// |
194 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi8) |
195 | #[inline ] |
196 | #[target_feature (enable = "avx512f,avx512bw,avx512vbmi2,avx512vl,avx" )] |
197 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
198 | pub unsafe fn _mm256_maskz_expandloadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i { |
199 | let mut dst: __m256i; |
200 | asm!( |
201 | vpl!("vpexpandb {dst}{{{k}}} {{z}}" ), |
202 | p = in(reg) mem_addr, |
203 | k = in(kreg) k, |
204 | dst = out(ymm_reg) dst, |
205 | options(pure, readonly, nostack) |
206 | ); |
207 | dst |
208 | } |
209 | |
210 | /// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
211 | /// |
212 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi8) |
213 | #[inline ] |
214 | #[target_feature (enable = "avx512f,avx512vbmi2,avx512vl,avx,sse" )] |
215 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
216 | pub unsafe fn _mm_mask_expandloadu_epi8( |
217 | src: __m128i, |
218 | k: __mmask16, |
219 | mem_addr: *const i8, |
220 | ) -> __m128i { |
221 | let mut dst: __m128i = src; |
222 | asm!( |
223 | vpl!("vpexpandb {dst}{{{k}}}" ), |
224 | p = in(reg) mem_addr, |
225 | k = in(kreg) k, |
226 | dst = inout(xmm_reg) dst, |
227 | options(pure, readonly, nostack) |
228 | ); |
229 | dst |
230 | } |
231 | |
232 | /// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
233 | /// |
234 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi8) |
235 | #[inline ] |
236 | #[target_feature (enable = "avx512f,avx512vbmi2,avx512vl,avx,sse" )] |
237 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
238 | pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i { |
239 | let mut dst: __m128i; |
240 | asm!( |
241 | vpl!("vpexpandb {dst}{{{k}}} {{z}}" ), |
242 | p = in(reg) mem_addr, |
243 | k = in(kreg) k, |
244 | dst = out(xmm_reg) dst, |
245 | options(pure, readonly, nostack) |
246 | ); |
247 | dst |
248 | } |
249 | |
250 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
251 | /// |
252 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi16) |
253 | #[inline ] |
254 | #[target_feature (enable = "avx512vbmi2" )] |
255 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
256 | #[cfg_attr (test, assert_instr(vpcompressw))] |
257 | pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask32, a: __m512i) { |
258 | vcompressstorew(mem:base_addr as *mut _, data:a.as_i16x32(), mask:k) |
259 | } |
260 | |
261 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
262 | /// |
263 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi16) |
264 | #[inline ] |
265 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
266 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
267 | #[cfg_attr (test, assert_instr(vpcompressw))] |
268 | pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask16, a: __m256i) { |
269 | vcompressstorew256(mem:base_addr as *mut _, data:a.as_i16x16(), mask:k) |
270 | } |
271 | |
272 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
273 | /// |
274 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi16) |
275 | #[inline ] |
276 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
277 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
278 | #[cfg_attr (test, assert_instr(vpcompressw))] |
279 | pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask8, a: __m128i) { |
280 | vcompressstorew128(mem:base_addr as *mut _, data:a.as_i16x8(), mask:k) |
281 | } |
282 | |
283 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
284 | /// |
285 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi8) |
286 | #[inline ] |
287 | #[target_feature (enable = "avx512vbmi2" )] |
288 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
289 | #[cfg_attr (test, assert_instr(vpcompressb))] |
290 | pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask64, a: __m512i) { |
291 | vcompressstoreb(mem:base_addr as *mut _, data:a.as_i8x64(), mask:k) |
292 | } |
293 | |
294 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
295 | /// |
296 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi8) |
297 | #[inline ] |
298 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
299 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
300 | #[cfg_attr (test, assert_instr(vpcompressb))] |
301 | pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask32, a: __m256i) { |
302 | vcompressstoreb256(mem:base_addr as *mut _, data:a.as_i8x32(), mask:k) |
303 | } |
304 | |
305 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
306 | /// |
307 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi8) |
308 | #[inline ] |
309 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
310 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
311 | #[cfg_attr (test, assert_instr(vpcompressb))] |
312 | pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask16, a: __m128i) { |
313 | vcompressstoreb128(mem:base_addr as *mut _, data:a.as_i8x16(), mask:k) |
314 | } |
315 | |
316 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. |
317 | /// |
318 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi16&expand=1192) |
319 | #[inline ] |
320 | #[target_feature (enable = "avx512vbmi2" )] |
321 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
322 | #[cfg_attr (test, assert_instr(vpcompressw))] |
323 | pub unsafe fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { |
324 | transmute(src:vpcompressw(a:a.as_i16x32(), src:src.as_i16x32(), mask:k)) |
325 | } |
326 | |
327 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. |
328 | /// |
329 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi16&expand=1193) |
330 | #[inline ] |
331 | #[target_feature (enable = "avx512vbmi2" )] |
332 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
333 | #[cfg_attr (test, assert_instr(vpcompressw))] |
334 | pub unsafe fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i { |
335 | transmute(src:vpcompressw( |
336 | a:a.as_i16x32(), |
337 | src:_mm512_setzero_si512().as_i16x32(), |
338 | mask:k, |
339 | )) |
340 | } |
341 | |
342 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. |
343 | /// |
344 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi16&expand=1190) |
345 | #[inline ] |
346 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
347 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
348 | #[cfg_attr (test, assert_instr(vpcompressw))] |
349 | pub unsafe fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { |
350 | transmute(src:vpcompressw256(a:a.as_i16x16(), src:src.as_i16x16(), mask:k)) |
351 | } |
352 | |
353 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. |
354 | /// |
355 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi16&expand=1191) |
356 | #[inline ] |
357 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
358 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
359 | #[cfg_attr (test, assert_instr(vpcompressw))] |
360 | pub unsafe fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i { |
361 | transmute(src:vpcompressw256( |
362 | a:a.as_i16x16(), |
363 | src:_mm256_setzero_si256().as_i16x16(), |
364 | mask:k, |
365 | )) |
366 | } |
367 | |
368 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. |
369 | /// |
370 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi16&expand=1188) |
371 | #[inline ] |
372 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
373 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
374 | #[cfg_attr (test, assert_instr(vpcompressw))] |
375 | pub unsafe fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
376 | transmute(src:vpcompressw128(a:a.as_i16x8(), src:src.as_i16x8(), mask:k)) |
377 | } |
378 | |
379 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. |
380 | /// |
381 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi16&expand=1189) |
382 | #[inline ] |
383 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
384 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
385 | #[cfg_attr (test, assert_instr(vpcompressw))] |
386 | pub unsafe fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i { |
387 | transmute(src:vpcompressw128( |
388 | a:a.as_i16x8(), |
389 | src:_mm_setzero_si128().as_i16x8(), |
390 | mask:k, |
391 | )) |
392 | } |
393 | |
394 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. |
395 | /// |
396 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi8&expand=1210) |
397 | #[inline ] |
398 | #[target_feature (enable = "avx512vbmi2" )] |
399 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
400 | #[cfg_attr (test, assert_instr(vpcompressb))] |
401 | pub unsafe fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { |
402 | transmute(src:vpcompressb(a:a.as_i8x64(), src:src.as_i8x64(), mask:k)) |
403 | } |
404 | |
405 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. |
406 | /// |
407 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi8&expand=1211) |
408 | #[inline ] |
409 | #[target_feature (enable = "avx512vbmi2" )] |
410 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
411 | #[cfg_attr (test, assert_instr(vpcompressb))] |
412 | pub unsafe fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i { |
413 | transmute(src:vpcompressb( |
414 | a:a.as_i8x64(), |
415 | src:_mm512_setzero_si512().as_i8x64(), |
416 | mask:k, |
417 | )) |
418 | } |
419 | |
420 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. |
421 | /// |
422 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi8&expand=1208) |
423 | #[inline ] |
424 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
425 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
426 | #[cfg_attr (test, assert_instr(vpcompressb))] |
427 | pub unsafe fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { |
428 | transmute(src:vpcompressb256(a:a.as_i8x32(), src:src.as_i8x32(), mask:k)) |
429 | } |
430 | |
431 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. |
432 | /// |
433 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi8&expand=1209) |
434 | #[inline ] |
435 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
436 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
437 | #[cfg_attr (test, assert_instr(vpcompressb))] |
438 | pub unsafe fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i { |
439 | transmute(src:vpcompressb256( |
440 | a:a.as_i8x32(), |
441 | src:_mm256_setzero_si256().as_i8x32(), |
442 | mask:k, |
443 | )) |
444 | } |
445 | |
446 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. |
447 | /// |
448 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi8&expand=1206) |
449 | #[inline ] |
450 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
451 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
452 | #[cfg_attr (test, assert_instr(vpcompressb))] |
453 | pub unsafe fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { |
454 | transmute(src:vpcompressb128(a:a.as_i8x16(), src:src.as_i8x16(), mask:k)) |
455 | } |
456 | |
457 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. |
458 | /// |
459 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi8&expand=1207) |
460 | #[inline ] |
461 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
462 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
463 | #[cfg_attr (test, assert_instr(vpcompressb))] |
464 | pub unsafe fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i { |
465 | transmute(src:vpcompressb128( |
466 | a:a.as_i8x16(), |
467 | src:_mm_setzero_si128().as_i8x16(), |
468 | mask:k, |
469 | )) |
470 | } |
471 | |
472 | /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
473 | /// |
474 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi16&expand=2310) |
475 | #[inline ] |
476 | #[target_feature (enable = "avx512vbmi2" )] |
477 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
478 | #[cfg_attr (test, assert_instr(vpexpandw))] |
479 | pub unsafe fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { |
480 | transmute(src:vpexpandw(a:a.as_i16x32(), src:src.as_i16x32(), mask:k)) |
481 | } |
482 | |
483 | /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
484 | /// |
485 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi16&expand=2311) |
486 | #[inline ] |
487 | #[target_feature (enable = "avx512vbmi2" )] |
488 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
489 | #[cfg_attr (test, assert_instr(vpexpandw))] |
490 | pub unsafe fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i { |
491 | transmute(src:vpexpandw( |
492 | a:a.as_i16x32(), |
493 | src:_mm512_setzero_si512().as_i16x32(), |
494 | mask:k, |
495 | )) |
496 | } |
497 | |
498 | /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
499 | /// |
500 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi16&expand=2308) |
501 | #[inline ] |
502 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
503 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
504 | #[cfg_attr (test, assert_instr(vpexpandw))] |
505 | pub unsafe fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { |
506 | transmute(src:vpexpandw256(a:a.as_i16x16(), src:src.as_i16x16(), mask:k)) |
507 | } |
508 | |
509 | /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
510 | /// |
511 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi16&expand=2309) |
512 | #[inline ] |
513 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
514 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
515 | #[cfg_attr (test, assert_instr(vpexpandw))] |
516 | pub unsafe fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i { |
517 | transmute(src:vpexpandw256( |
518 | a:a.as_i16x16(), |
519 | src:_mm256_setzero_si256().as_i16x16(), |
520 | mask:k, |
521 | )) |
522 | } |
523 | |
524 | /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
525 | /// |
526 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi16&expand=2306) |
527 | #[inline ] |
528 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
529 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
530 | #[cfg_attr (test, assert_instr(vpexpandw))] |
531 | pub unsafe fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
532 | transmute(src:vpexpandw128(a:a.as_i16x8(), src:src.as_i16x8(), mask:k)) |
533 | } |
534 | |
535 | /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
536 | /// |
537 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi16&expand=2307) |
538 | #[inline ] |
539 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
540 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
541 | #[cfg_attr (test, assert_instr(vpexpandw))] |
542 | pub unsafe fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i { |
543 | transmute(src:vpexpandw128( |
544 | a:a.as_i16x8(), |
545 | src:_mm_setzero_si128().as_i16x8(), |
546 | mask:k, |
547 | )) |
548 | } |
549 | |
550 | /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
551 | /// |
552 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi8&expand=2328) |
553 | #[inline ] |
554 | #[target_feature (enable = "avx512vbmi2" )] |
555 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
556 | #[cfg_attr (test, assert_instr(vpexpandb))] |
557 | pub unsafe fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { |
558 | transmute(src:vpexpandb(a:a.as_i8x64(), src:src.as_i8x64(), mask:k)) |
559 | } |
560 | |
561 | /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
562 | /// |
563 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi8&expand=2329) |
564 | #[inline ] |
565 | #[target_feature (enable = "avx512vbmi2" )] |
566 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
567 | #[cfg_attr (test, assert_instr(vpexpandb))] |
568 | pub unsafe fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i { |
569 | transmute(src:vpexpandb( |
570 | a:a.as_i8x64(), |
571 | src:_mm512_setzero_si512().as_i8x64(), |
572 | mask:k, |
573 | )) |
574 | } |
575 | |
576 | /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
577 | /// |
578 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi8&expand=2326) |
579 | #[inline ] |
580 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
581 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
582 | #[cfg_attr (test, assert_instr(vpexpandb))] |
583 | pub unsafe fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { |
584 | transmute(src:vpexpandb256(a:a.as_i8x32(), src:src.as_i8x32(), mask:k)) |
585 | } |
586 | |
587 | /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
588 | /// |
589 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi8&expand=2327) |
590 | #[inline ] |
591 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
592 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
593 | #[cfg_attr (test, assert_instr(vpexpandb))] |
594 | pub unsafe fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i { |
595 | transmute(src:vpexpandb256( |
596 | a:a.as_i8x32(), |
597 | src:_mm256_setzero_si256().as_i8x32(), |
598 | mask:k, |
599 | )) |
600 | } |
601 | |
602 | /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
603 | /// |
604 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi8&expand=2324) |
605 | #[inline ] |
606 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
607 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
608 | #[cfg_attr (test, assert_instr(vpexpandb))] |
609 | pub unsafe fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { |
610 | transmute(src:vpexpandb128(a:a.as_i8x16(), src:src.as_i8x16(), mask:k)) |
611 | } |
612 | |
613 | /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
614 | /// |
615 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi8&expand=2325) |
616 | #[inline ] |
617 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
618 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
619 | #[cfg_attr (test, assert_instr(vpexpandb))] |
620 | pub unsafe fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i { |
621 | transmute(src:vpexpandb128( |
622 | a:a.as_i8x16(), |
623 | src:_mm_setzero_si128().as_i8x16(), |
624 | mask:k, |
625 | )) |
626 | } |
627 | |
628 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. |
629 | /// |
630 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi64&expand=5087) |
631 | #[inline ] |
632 | #[target_feature (enable = "avx512vbmi2" )] |
633 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
634 | #[cfg_attr (test, assert_instr(vpshldvq))] |
635 | pub unsafe fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
636 | transmute(src:vpshldvq(a:a.as_i64x8(), b:b.as_i64x8(), c:c.as_i64x8())) |
637 | } |
638 | |
639 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
640 | /// |
641 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi64&expand=5085) |
642 | #[inline ] |
643 | #[target_feature (enable = "avx512vbmi2" )] |
644 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
645 | #[cfg_attr (test, assert_instr(vpshldvq))] |
646 | pub unsafe fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { |
647 | let shf: i64x8 = _mm512_shldv_epi64(a, b, c).as_i64x8(); |
648 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x8())) |
649 | } |
650 | |
651 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
652 | /// |
653 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi64&expand=5086) |
654 | #[inline ] |
655 | #[target_feature (enable = "avx512vbmi2" )] |
656 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
657 | #[cfg_attr (test, assert_instr(vpshldvq))] |
658 | pub unsafe fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
659 | let shf: i64x8 = _mm512_shldv_epi64(a, b, c).as_i64x8(); |
660 | let zero: i64x8 = _mm512_setzero_si512().as_i64x8(); |
661 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
662 | } |
663 | |
664 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. |
665 | /// |
666 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi64&expand=5084) |
667 | #[inline ] |
668 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
669 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
670 | #[cfg_attr (test, assert_instr(vpshldvq))] |
671 | pub unsafe fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
672 | transmute(src:vpshldvq256(a:a.as_i64x4(), b:b.as_i64x4(), c:c.as_i64x4())) |
673 | } |
674 | |
675 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
676 | /// |
677 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi64&expand=5082) |
678 | #[inline ] |
679 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
680 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
681 | #[cfg_attr (test, assert_instr(vpshldvq))] |
682 | pub unsafe fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { |
683 | let shf: i64x4 = _mm256_shldv_epi64(a, b, c).as_i64x4(); |
684 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x4())) |
685 | } |
686 | |
687 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
688 | /// |
689 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi64&expand=5083) |
690 | #[inline ] |
691 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
692 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
693 | #[cfg_attr (test, assert_instr(vpshldvq))] |
694 | pub unsafe fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
695 | let shf: i64x4 = _mm256_shldv_epi64(a, b, c).as_i64x4(); |
696 | let zero: i64x4 = _mm256_setzero_si256().as_i64x4(); |
697 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
698 | } |
699 | |
700 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. |
701 | /// |
702 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi64&expand=5081) |
703 | #[inline ] |
704 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
705 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
706 | #[cfg_attr (test, assert_instr(vpshldvq))] |
707 | pub unsafe fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
708 | transmute(src:vpshldvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:c.as_i64x2())) |
709 | } |
710 | |
711 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
712 | /// |
713 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi64&expand=5079) |
714 | #[inline ] |
715 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
716 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
717 | #[cfg_attr (test, assert_instr(vpshldvq))] |
718 | pub unsafe fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { |
719 | let shf: i64x2 = _mm_shldv_epi64(a, b, c).as_i64x2(); |
720 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x2())) |
721 | } |
722 | |
723 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
724 | /// |
725 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi64&expand=5080) |
726 | #[inline ] |
727 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
728 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
729 | #[cfg_attr (test, assert_instr(vpshldvq))] |
730 | pub unsafe fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
731 | let shf: i64x2 = _mm_shldv_epi64(a, b, c).as_i64x2(); |
732 | let zero: i64x2 = _mm_setzero_si128().as_i64x2(); |
733 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
734 | } |
735 | |
736 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. |
737 | /// |
738 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi32&expand=5078) |
739 | #[inline ] |
740 | #[target_feature (enable = "avx512vbmi2" )] |
741 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
742 | #[cfg_attr (test, assert_instr(vpshldvd))] |
743 | pub unsafe fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
744 | transmute(src:vpshldvd(a:a.as_i32x16(), b:b.as_i32x16(), c:c.as_i32x16())) |
745 | } |
746 | |
747 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
748 | /// |
749 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi32&expand=5076) |
750 | #[inline ] |
751 | #[target_feature (enable = "avx512vbmi2" )] |
752 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
753 | #[cfg_attr (test, assert_instr(vpshldvd))] |
754 | pub unsafe fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i { |
755 | let shf: i32x16 = _mm512_shldv_epi32(a, b, c).as_i32x16(); |
756 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x16())) |
757 | } |
758 | |
759 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
760 | /// |
761 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi32&expand=5077) |
762 | #[inline ] |
763 | #[target_feature (enable = "avx512vbmi2" )] |
764 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
765 | #[cfg_attr (test, assert_instr(vpshldvd))] |
766 | pub unsafe fn _mm512_maskz_shldv_epi32( |
767 | k: __mmask16, |
768 | a: __m512i, |
769 | b: __m512i, |
770 | c: __m512i, |
771 | ) -> __m512i { |
772 | let shf: i32x16 = _mm512_shldv_epi32(a, b, c).as_i32x16(); |
773 | let zero: i32x16 = _mm512_setzero_si512().as_i32x16(); |
774 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
775 | } |
776 | |
777 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. |
778 | /// |
779 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi32&expand=5075) |
780 | #[inline ] |
781 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
782 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
783 | #[cfg_attr (test, assert_instr(vpshldvd))] |
784 | pub unsafe fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
785 | transmute(src:vpshldvd256(a:a.as_i32x8(), b:b.as_i32x8(), c:c.as_i32x8())) |
786 | } |
787 | |
788 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
789 | /// |
790 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi32&expand=5073) |
791 | #[inline ] |
792 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
793 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
794 | #[cfg_attr (test, assert_instr(vpshldvd))] |
795 | pub unsafe fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { |
796 | let shf: i32x8 = _mm256_shldv_epi32(a, b, c).as_i32x8(); |
797 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x8())) |
798 | } |
799 | |
800 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
801 | /// |
802 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi32&expand=5074) |
803 | #[inline ] |
804 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
805 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
806 | #[cfg_attr (test, assert_instr(vpshldvd))] |
807 | pub unsafe fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
808 | let shf: i32x8 = _mm256_shldv_epi32(a, b, c).as_i32x8(); |
809 | let zero: i32x8 = _mm256_setzero_si256().as_i32x8(); |
810 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
811 | } |
812 | |
813 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. |
814 | /// |
815 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi32&expand=5072) |
816 | #[inline ] |
817 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
818 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
819 | #[cfg_attr (test, assert_instr(vpshldvd))] |
820 | pub unsafe fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
821 | transmute(src:vpshldvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:c.as_i32x4())) |
822 | } |
823 | |
824 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
825 | /// |
826 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi32&expand=5070) |
827 | #[inline ] |
828 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
829 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
830 | #[cfg_attr (test, assert_instr(vpshldvd))] |
831 | pub unsafe fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { |
832 | let shf: i32x4 = _mm_shldv_epi32(a, b, c).as_i32x4(); |
833 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x4())) |
834 | } |
835 | |
836 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
837 | /// |
838 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi32&expand=5071) |
839 | #[inline ] |
840 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
841 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
842 | #[cfg_attr (test, assert_instr(vpshldvd))] |
843 | pub unsafe fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
844 | let shf: i32x4 = _mm_shldv_epi32(a, b, c).as_i32x4(); |
845 | let zero: i32x4 = _mm_setzero_si128().as_i32x4(); |
846 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
847 | } |
848 | |
849 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. |
850 | /// |
851 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi16&expand=5069) |
852 | #[inline ] |
853 | #[target_feature (enable = "avx512vbmi2" )] |
854 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
855 | #[cfg_attr (test, assert_instr(vpshldvw))] |
856 | pub unsafe fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
857 | transmute(src:vpshldvw(a:a.as_i16x32(), b:b.as_i16x32(), c:c.as_i16x32())) |
858 | } |
859 | |
860 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
861 | /// |
862 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi16&expand=5067) |
863 | #[inline ] |
864 | #[target_feature (enable = "avx512vbmi2" )] |
865 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
866 | #[cfg_attr (test, assert_instr(vpshldvw))] |
867 | pub unsafe fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i { |
868 | let shf: i16x32 = _mm512_shldv_epi16(a, b, c).as_i16x32(); |
869 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x32())) |
870 | } |
871 | |
872 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
873 | /// |
874 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi16&expand=5068) |
875 | #[inline ] |
876 | #[target_feature (enable = "avx512vbmi2" )] |
877 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
878 | #[cfg_attr (test, assert_instr(vpshldvw))] |
879 | pub unsafe fn _mm512_maskz_shldv_epi16( |
880 | k: __mmask32, |
881 | a: __m512i, |
882 | b: __m512i, |
883 | c: __m512i, |
884 | ) -> __m512i { |
885 | let shf: i16x32 = _mm512_shldv_epi16(a, b, c).as_i16x32(); |
886 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
887 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
888 | } |
889 | |
890 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. |
891 | /// |
892 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi16&expand=5066) |
893 | #[inline ] |
894 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
895 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
896 | #[cfg_attr (test, assert_instr(vpshldvw))] |
897 | pub unsafe fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
898 | transmute(src:vpshldvw256(a:a.as_i16x16(), b:b.as_i16x16(), c:c.as_i16x16())) |
899 | } |
900 | |
901 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
902 | /// |
903 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi16&expand=5064) |
904 | #[inline ] |
905 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
906 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
907 | #[cfg_attr (test, assert_instr(vpshldvw))] |
908 | pub unsafe fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i { |
909 | let shf: i16x16 = _mm256_shldv_epi16(a, b, c).as_i16x16(); |
910 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x16())) |
911 | } |
912 | |
913 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
914 | /// |
915 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi16&expand=5065) |
916 | #[inline ] |
917 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
918 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
919 | #[cfg_attr (test, assert_instr(vpshldvw))] |
920 | pub unsafe fn _mm256_maskz_shldv_epi16( |
921 | k: __mmask16, |
922 | a: __m256i, |
923 | b: __m256i, |
924 | c: __m256i, |
925 | ) -> __m256i { |
926 | let shf: i16x16 = _mm256_shldv_epi16(a, b, c).as_i16x16(); |
927 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
928 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
929 | } |
930 | |
931 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. |
932 | /// |
933 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi16&expand=5063) |
934 | #[inline ] |
935 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
936 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
937 | #[cfg_attr (test, assert_instr(vpshldvw))] |
938 | pub unsafe fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
939 | transmute(src:vpshldvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:c.as_i16x8())) |
940 | } |
941 | |
942 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
943 | /// |
944 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi16&expand=5061) |
945 | #[inline ] |
946 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
947 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
948 | #[cfg_attr (test, assert_instr(vpshldvw))] |
949 | pub unsafe fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { |
950 | let shf: i16x8 = _mm_shldv_epi16(a, b, c).as_i16x8(); |
951 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x8())) |
952 | } |
953 | |
954 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
955 | /// |
956 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi16&expand=5062) |
957 | #[inline ] |
958 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
959 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
960 | #[cfg_attr (test, assert_instr(vpshldvw))] |
961 | pub unsafe fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
962 | let shf: i16x8 = _mm_shldv_epi16(a, b, c).as_i16x8(); |
963 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
964 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
965 | } |
966 | |
967 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. |
968 | /// |
969 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi64&expand=5141) |
970 | #[inline ] |
971 | #[target_feature (enable = "avx512vbmi2" )] |
972 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
973 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
974 | pub unsafe fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
975 | transmute(src:vpshrdvq(a:a.as_i64x8(), b:b.as_i64x8(), c:c.as_i64x8())) |
976 | } |
977 | |
978 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
979 | /// |
980 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi64&expand=5139) |
981 | #[inline ] |
982 | #[target_feature (enable = "avx512vbmi2" )] |
983 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
984 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
985 | pub unsafe fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { |
986 | let shf: i64x8 = _mm512_shrdv_epi64(a, b, c).as_i64x8(); |
987 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x8())) |
988 | } |
989 | |
990 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
991 | /// |
992 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi64&expand=5140) |
993 | #[inline ] |
994 | #[target_feature (enable = "avx512vbmi2" )] |
995 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
996 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
997 | pub unsafe fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
998 | let shf: i64x8 = _mm512_shrdv_epi64(a, b, c).as_i64x8(); |
999 | let zero: i64x8 = _mm512_setzero_si512().as_i64x8(); |
1000 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1001 | } |
1002 | |
1003 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. |
1004 | /// |
1005 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi64&expand=5138) |
1006 | #[inline ] |
1007 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1008 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1009 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
1010 | pub unsafe fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
1011 | transmute(src:vpshrdvq256(a:a.as_i64x4(), b:b.as_i64x4(), c:c.as_i64x4())) |
1012 | } |
1013 | |
1014 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1015 | /// |
1016 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi64&expand=5136) |
1017 | #[inline ] |
1018 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1019 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1020 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
1021 | pub unsafe fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { |
1022 | let shf: i64x4 = _mm256_shrdv_epi64(a, b, c).as_i64x4(); |
1023 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x4())) |
1024 | } |
1025 | |
1026 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1027 | /// |
1028 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi64&expand=5137) |
1029 | #[inline ] |
1030 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1031 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1032 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
1033 | pub unsafe fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
1034 | let shf: i64x4 = _mm256_shrdv_epi64(a, b, c).as_i64x4(); |
1035 | let zero: i64x4 = _mm256_setzero_si256().as_i64x4(); |
1036 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1037 | } |
1038 | |
1039 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. |
1040 | /// |
1041 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi64&expand=5135) |
1042 | #[inline ] |
1043 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1044 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1045 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
1046 | pub unsafe fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
1047 | transmute(src:vpshrdvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:c.as_i64x2())) |
1048 | } |
1049 | |
1050 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1051 | /// |
1052 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi64&expand=5133) |
1053 | #[inline ] |
1054 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1055 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1056 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
1057 | pub unsafe fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { |
1058 | let shf: i64x2 = _mm_shrdv_epi64(a, b, c).as_i64x2(); |
1059 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x2())) |
1060 | } |
1061 | |
1062 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1063 | /// |
1064 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi64&expand=5134) |
1065 | #[inline ] |
1066 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1067 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1068 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
1069 | pub unsafe fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
1070 | let shf: i64x2 = _mm_shrdv_epi64(a, b, c).as_i64x2(); |
1071 | let zero: i64x2 = _mm_setzero_si128().as_i64x2(); |
1072 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1073 | } |
1074 | |
1075 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. |
1076 | /// |
1077 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi32&expand=5132) |
1078 | #[inline ] |
1079 | #[target_feature (enable = "avx512vbmi2" )] |
1080 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1081 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1082 | pub unsafe fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
1083 | transmute(src:vpshrdvd(a:a.as_i32x16(), b:b.as_i32x16(), c:c.as_i32x16())) |
1084 | } |
1085 | |
1086 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1087 | /// |
1088 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi32&expand=5130) |
1089 | #[inline ] |
1090 | #[target_feature (enable = "avx512vbmi2" )] |
1091 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1092 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1093 | pub unsafe fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i { |
1094 | let shf: i32x16 = _mm512_shrdv_epi32(a, b, c).as_i32x16(); |
1095 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x16())) |
1096 | } |
1097 | |
1098 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1099 | /// |
1100 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi32&expand=5131) |
1101 | #[inline ] |
1102 | #[target_feature (enable = "avx512vbmi2" )] |
1103 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1104 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1105 | pub unsafe fn _mm512_maskz_shrdv_epi32( |
1106 | k: __mmask16, |
1107 | a: __m512i, |
1108 | b: __m512i, |
1109 | c: __m512i, |
1110 | ) -> __m512i { |
1111 | let shf: i32x16 = _mm512_shrdv_epi32(a, b, c).as_i32x16(); |
1112 | let zero: i32x16 = _mm512_setzero_si512().as_i32x16(); |
1113 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1114 | } |
1115 | |
1116 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. |
1117 | /// |
1118 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi32&expand=5129) |
1119 | #[inline ] |
1120 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1121 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1122 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1123 | pub unsafe fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
1124 | transmute(src:vpshrdvd256(a:a.as_i32x8(), b:b.as_i32x8(), c:c.as_i32x8())) |
1125 | } |
1126 | |
1127 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1128 | /// |
1129 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi32&expand=5127) |
1130 | #[inline ] |
1131 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1132 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1133 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1134 | pub unsafe fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { |
1135 | let shf: i32x8 = _mm256_shrdv_epi32(a, b, c).as_i32x8(); |
1136 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x8())) |
1137 | } |
1138 | |
1139 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1140 | /// |
1141 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi32&expand=5128) |
1142 | #[inline ] |
1143 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1144 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1145 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1146 | pub unsafe fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
1147 | let shf: i32x8 = _mm256_shrdv_epi32(a, b, c).as_i32x8(); |
1148 | let zero: i32x8 = _mm256_setzero_si256().as_i32x8(); |
1149 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1150 | } |
1151 | |
1152 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. |
1153 | /// |
1154 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi32&expand=5126) |
1155 | #[inline ] |
1156 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1157 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1158 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1159 | pub unsafe fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
1160 | transmute(src:vpshrdvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:c.as_i32x4())) |
1161 | } |
1162 | |
1163 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1164 | /// |
1165 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi32&expand=5124) |
1166 | #[inline ] |
1167 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1168 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1169 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1170 | pub unsafe fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { |
1171 | let shf: i32x4 = _mm_shrdv_epi32(a, b, c).as_i32x4(); |
1172 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x4())) |
1173 | } |
1174 | |
1175 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1176 | /// |
1177 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi32&expand=5125) |
1178 | #[inline ] |
1179 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1180 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1181 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1182 | pub unsafe fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
1183 | let shf: i32x4 = _mm_shrdv_epi32(a, b, c).as_i32x4(); |
1184 | let zero: i32x4 = _mm_setzero_si128().as_i32x4(); |
1185 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1186 | } |
1187 | |
1188 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. |
1189 | /// |
1190 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi16&expand=5123) |
1191 | #[inline ] |
1192 | #[target_feature (enable = "avx512vbmi2" )] |
1193 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1194 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1195 | pub unsafe fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
1196 | transmute(src:vpshrdvw(a:a.as_i16x32(), b:b.as_i16x32(), c:c.as_i16x32())) |
1197 | } |
1198 | |
1199 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1200 | /// |
1201 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi16&expand=5121) |
1202 | #[inline ] |
1203 | #[target_feature (enable = "avx512vbmi2" )] |
1204 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1205 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1206 | pub unsafe fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i { |
1207 | let shf: i16x32 = _mm512_shrdv_epi16(a, b, c).as_i16x32(); |
1208 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x32())) |
1209 | } |
1210 | |
1211 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1212 | /// |
1213 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi16&expand=5122) |
1214 | #[inline ] |
1215 | #[target_feature (enable = "avx512vbmi2" )] |
1216 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1217 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1218 | pub unsafe fn _mm512_maskz_shrdv_epi16( |
1219 | k: __mmask32, |
1220 | a: __m512i, |
1221 | b: __m512i, |
1222 | c: __m512i, |
1223 | ) -> __m512i { |
1224 | let shf: i16x32 = _mm512_shrdv_epi16(a, b, c).as_i16x32(); |
1225 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
1226 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1227 | } |
1228 | |
1229 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. |
1230 | /// |
1231 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi16&expand=5120) |
1232 | #[inline ] |
1233 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1234 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1235 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1236 | pub unsafe fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
1237 | transmute(src:vpshrdvw256(a:a.as_i16x16(), b:b.as_i16x16(), c:c.as_i16x16())) |
1238 | } |
1239 | |
1240 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1241 | /// |
1242 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi16&expand=5118) |
1243 | #[inline ] |
1244 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1245 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1246 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1247 | pub unsafe fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i { |
1248 | let shf: i16x16 = _mm256_shrdv_epi16(a, b, c).as_i16x16(); |
1249 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x16())) |
1250 | } |
1251 | |
1252 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1253 | /// |
1254 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi16&expand=5119) |
1255 | #[inline ] |
1256 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1257 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1258 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1259 | pub unsafe fn _mm256_maskz_shrdv_epi16( |
1260 | k: __mmask16, |
1261 | a: __m256i, |
1262 | b: __m256i, |
1263 | c: __m256i, |
1264 | ) -> __m256i { |
1265 | let shf: i16x16 = _mm256_shrdv_epi16(a, b, c).as_i16x16(); |
1266 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
1267 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1268 | } |
1269 | |
1270 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. |
1271 | /// |
1272 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi16&expand=5117) |
1273 | #[inline ] |
1274 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1275 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1276 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1277 | pub unsafe fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
1278 | transmute(src:vpshrdvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:c.as_i16x8())) |
1279 | } |
1280 | |
1281 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1282 | /// |
1283 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi16&expand=5115) |
1284 | #[inline ] |
1285 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1286 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1287 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1288 | pub unsafe fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { |
1289 | let shf: i16x8 = _mm_shrdv_epi16(a, b, c).as_i16x8(); |
1290 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x8())) |
1291 | } |
1292 | |
1293 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1294 | /// |
1295 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi16&expand=5116) |
1296 | #[inline ] |
1297 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1298 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1299 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1300 | pub unsafe fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
1301 | let shf: i16x8 = _mm_shrdv_epi16(a, b, c).as_i16x8(); |
1302 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
1303 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1304 | } |
1305 | |
1306 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). |
1307 | /// |
1308 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi64&expand=5060) |
1309 | #[inline ] |
1310 | #[target_feature (enable = "avx512vbmi2" )] |
1311 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1312 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1313 | #[rustc_legacy_const_generics (2)] |
1314 | pub unsafe fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
1315 | static_assert_uimm_bits!(IMM8, 8); |
1316 | let imm8: i64 = IMM8 as i64; |
1317 | transmute(src:vpshldvq( |
1318 | a:a.as_i64x8(), |
1319 | b:b.as_i64x8(), |
1320 | c:_mm512_set1_epi64(imm8).as_i64x8(), |
1321 | )) |
1322 | } |
1323 | |
1324 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1325 | /// |
1326 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi64&expand=5058) |
1327 | #[inline ] |
1328 | #[target_feature (enable = "avx512vbmi2" )] |
1329 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1330 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1331 | #[rustc_legacy_const_generics (4)] |
1332 | pub unsafe fn _mm512_mask_shldi_epi64<const IMM8: i32>( |
1333 | src: __m512i, |
1334 | k: __mmask8, |
1335 | a: __m512i, |
1336 | b: __m512i, |
1337 | ) -> __m512i { |
1338 | static_assert_uimm_bits!(IMM8, 8); |
1339 | let imm8: i64 = IMM8 as i64; |
1340 | let shf: i64x8 = vpshldvq( |
1341 | a:a.as_i64x8(), |
1342 | b:b.as_i64x8(), |
1343 | c:_mm512_set1_epi64(imm8).as_i64x8(), |
1344 | ); |
1345 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8())) |
1346 | } |
1347 | |
1348 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1349 | /// |
1350 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi64&expand=5059) |
1351 | #[inline ] |
1352 | #[target_feature (enable = "avx512vbmi2" )] |
1353 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1354 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1355 | #[rustc_legacy_const_generics (3)] |
1356 | pub unsafe fn _mm512_maskz_shldi_epi64<const IMM8: i32>( |
1357 | k: __mmask8, |
1358 | a: __m512i, |
1359 | b: __m512i, |
1360 | ) -> __m512i { |
1361 | static_assert_uimm_bits!(IMM8, 8); |
1362 | let imm8: i64 = IMM8 as i64; |
1363 | let shf: i64x8 = vpshldvq( |
1364 | a:a.as_i64x8(), |
1365 | b:b.as_i64x8(), |
1366 | c:_mm512_set1_epi64(imm8).as_i64x8(), |
1367 | ); |
1368 | let zero: i64x8 = _mm512_setzero_si512().as_i64x8(); |
1369 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1370 | } |
1371 | |
1372 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). |
1373 | /// |
1374 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi64&expand=5057) |
1375 | #[inline ] |
1376 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1377 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1378 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1379 | #[rustc_legacy_const_generics (2)] |
1380 | pub unsafe fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
1381 | static_assert_uimm_bits!(IMM8, 8); |
1382 | let imm8: i64 = IMM8 as i64; |
1383 | transmute(src:vpshldvq256( |
1384 | a:a.as_i64x4(), |
1385 | b:b.as_i64x4(), |
1386 | c:_mm256_set1_epi64x(imm8).as_i64x4(), |
1387 | )) |
1388 | } |
1389 | |
1390 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1391 | /// |
1392 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi64&expand=5055) |
1393 | #[inline ] |
1394 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1395 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1396 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1397 | #[rustc_legacy_const_generics (4)] |
1398 | pub unsafe fn _mm256_mask_shldi_epi64<const IMM8: i32>( |
1399 | src: __m256i, |
1400 | k: __mmask8, |
1401 | a: __m256i, |
1402 | b: __m256i, |
1403 | ) -> __m256i { |
1404 | static_assert_uimm_bits!(IMM8, 8); |
1405 | let imm8: i64 = IMM8 as i64; |
1406 | let shf: i64x4 = vpshldvq256( |
1407 | a:a.as_i64x4(), |
1408 | b:b.as_i64x4(), |
1409 | c:_mm256_set1_epi64x(imm8).as_i64x4(), |
1410 | ); |
1411 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4())) |
1412 | } |
1413 | |
1414 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1415 | /// |
1416 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi64&expand=5056) |
1417 | #[inline ] |
1418 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1419 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1420 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1421 | #[rustc_legacy_const_generics (3)] |
1422 | pub unsafe fn _mm256_maskz_shldi_epi64<const IMM8: i32>( |
1423 | k: __mmask8, |
1424 | a: __m256i, |
1425 | b: __m256i, |
1426 | ) -> __m256i { |
1427 | static_assert_uimm_bits!(IMM8, 8); |
1428 | let imm8: i64 = IMM8 as i64; |
1429 | let shf: i64x4 = vpshldvq256( |
1430 | a:a.as_i64x4(), |
1431 | b:b.as_i64x4(), |
1432 | c:_mm256_set1_epi64x(imm8).as_i64x4(), |
1433 | ); |
1434 | let zero: i64x4 = _mm256_setzero_si256().as_i64x4(); |
1435 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1436 | } |
1437 | |
1438 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). |
1439 | /// |
1440 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi64&expand=5054) |
1441 | #[inline ] |
1442 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1443 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1444 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1445 | #[rustc_legacy_const_generics (2)] |
1446 | pub unsafe fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
1447 | static_assert_uimm_bits!(IMM8, 8); |
1448 | let imm8: i64 = IMM8 as i64; |
1449 | transmute(src:vpshldvq128( |
1450 | a:a.as_i64x2(), |
1451 | b:b.as_i64x2(), |
1452 | c:_mm_set1_epi64x(imm8).as_i64x2(), |
1453 | )) |
1454 | } |
1455 | |
1456 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1457 | /// |
1458 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi64&expand=5052) |
1459 | #[inline ] |
1460 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1461 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1462 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1463 | #[rustc_legacy_const_generics (4)] |
1464 | pub unsafe fn _mm_mask_shldi_epi64<const IMM8: i32>( |
1465 | src: __m128i, |
1466 | k: __mmask8, |
1467 | a: __m128i, |
1468 | b: __m128i, |
1469 | ) -> __m128i { |
1470 | static_assert_uimm_bits!(IMM8, 8); |
1471 | let imm8: i64 = IMM8 as i64; |
1472 | let shf: i64x2 = vpshldvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:_mm_set1_epi64x(imm8).as_i64x2()); |
1473 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2())) |
1474 | } |
1475 | |
1476 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1477 | /// |
1478 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi64&expand=5053) |
1479 | #[inline ] |
1480 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1481 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1482 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1483 | #[rustc_legacy_const_generics (3)] |
1484 | pub unsafe fn _mm_maskz_shldi_epi64<const IMM8: i32>( |
1485 | k: __mmask8, |
1486 | a: __m128i, |
1487 | b: __m128i, |
1488 | ) -> __m128i { |
1489 | static_assert_uimm_bits!(IMM8, 8); |
1490 | let imm8: i64 = IMM8 as i64; |
1491 | let shf: i64x2 = vpshldvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:_mm_set1_epi64x(imm8).as_i64x2()); |
1492 | let zero: i64x2 = _mm_setzero_si128().as_i64x2(); |
1493 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1494 | } |
1495 | |
1496 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. |
1497 | /// |
1498 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi32&expand=5051) |
1499 | #[inline ] |
1500 | #[target_feature (enable = "avx512vbmi2" )] |
1501 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1502 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1503 | #[rustc_legacy_const_generics (2)] |
1504 | pub unsafe fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
1505 | static_assert_uimm_bits!(IMM8, 8); |
1506 | transmute(src:vpshldvd( |
1507 | a:a.as_i32x16(), |
1508 | b:b.as_i32x16(), |
1509 | c:_mm512_set1_epi32(IMM8).as_i32x16(), |
1510 | )) |
1511 | } |
1512 | |
1513 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1514 | /// |
1515 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi32&expand=5049) |
1516 | #[inline ] |
1517 | #[target_feature (enable = "avx512vbmi2" )] |
1518 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1519 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1520 | #[rustc_legacy_const_generics (4)] |
1521 | pub unsafe fn _mm512_mask_shldi_epi32<const IMM8: i32>( |
1522 | src: __m512i, |
1523 | k: __mmask16, |
1524 | a: __m512i, |
1525 | b: __m512i, |
1526 | ) -> __m512i { |
1527 | static_assert_uimm_bits!(IMM8, 8); |
1528 | let shf: i32x16 = vpshldvd( |
1529 | a:a.as_i32x16(), |
1530 | b:b.as_i32x16(), |
1531 | c:_mm512_set1_epi32(IMM8).as_i32x16(), |
1532 | ); |
1533 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16())) |
1534 | } |
1535 | |
1536 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1537 | /// |
1538 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi32&expand=5050) |
1539 | #[inline ] |
1540 | #[target_feature (enable = "avx512vbmi2" )] |
1541 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1542 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1543 | #[rustc_legacy_const_generics (3)] |
1544 | pub unsafe fn _mm512_maskz_shldi_epi32<const IMM8: i32>( |
1545 | k: __mmask16, |
1546 | a: __m512i, |
1547 | b: __m512i, |
1548 | ) -> __m512i { |
1549 | static_assert_uimm_bits!(IMM8, 8); |
1550 | let shf: i32x16 = vpshldvd( |
1551 | a:a.as_i32x16(), |
1552 | b:b.as_i32x16(), |
1553 | c:_mm512_set1_epi32(IMM8).as_i32x16(), |
1554 | ); |
1555 | let zero: i32x16 = _mm512_setzero_si512().as_i32x16(); |
1556 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1557 | } |
1558 | |
1559 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. |
1560 | /// |
1561 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi32&expand=5048) |
1562 | #[inline ] |
1563 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1564 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1565 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1566 | #[rustc_legacy_const_generics (2)] |
1567 | pub unsafe fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
1568 | static_assert_uimm_bits!(IMM8, 8); |
1569 | transmute(src:vpshldvd256( |
1570 | a:a.as_i32x8(), |
1571 | b:b.as_i32x8(), |
1572 | c:_mm256_set1_epi32(IMM8).as_i32x8(), |
1573 | )) |
1574 | } |
1575 | |
1576 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1577 | /// |
1578 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi32&expand=5046) |
1579 | #[inline ] |
1580 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1581 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1582 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1583 | #[rustc_legacy_const_generics (4)] |
1584 | pub unsafe fn _mm256_mask_shldi_epi32<const IMM8: i32>( |
1585 | src: __m256i, |
1586 | k: __mmask8, |
1587 | a: __m256i, |
1588 | b: __m256i, |
1589 | ) -> __m256i { |
1590 | static_assert_uimm_bits!(IMM8, 8); |
1591 | let shf: i32x8 = vpshldvd256( |
1592 | a:a.as_i32x8(), |
1593 | b:b.as_i32x8(), |
1594 | c:_mm256_set1_epi32(IMM8).as_i32x8(), |
1595 | ); |
1596 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8())) |
1597 | } |
1598 | |
1599 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1600 | /// |
1601 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi32&expand=5047) |
1602 | #[inline ] |
1603 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1604 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1605 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1606 | #[rustc_legacy_const_generics (3)] |
1607 | pub unsafe fn _mm256_maskz_shldi_epi32<const IMM8: i32>( |
1608 | k: __mmask8, |
1609 | a: __m256i, |
1610 | b: __m256i, |
1611 | ) -> __m256i { |
1612 | static_assert_uimm_bits!(IMM8, 8); |
1613 | let shf: i32x8 = vpshldvd256( |
1614 | a:a.as_i32x8(), |
1615 | b:b.as_i32x8(), |
1616 | c:_mm256_set1_epi32(IMM8).as_i32x8(), |
1617 | ); |
1618 | let zero: i32x8 = _mm256_setzero_si256().as_i32x8(); |
1619 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1620 | } |
1621 | |
1622 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. |
1623 | /// |
1624 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi32&expand=5045) |
1625 | #[inline ] |
1626 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1627 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1628 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1629 | #[rustc_legacy_const_generics (2)] |
1630 | pub unsafe fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
1631 | static_assert_uimm_bits!(IMM8, 8); |
1632 | transmute(src:vpshldvd128( |
1633 | a:a.as_i32x4(), |
1634 | b:b.as_i32x4(), |
1635 | c:_mm_set1_epi32(IMM8).as_i32x4(), |
1636 | )) |
1637 | } |
1638 | |
1639 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1640 | /// |
1641 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi32&expand=5043) |
1642 | #[inline ] |
1643 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1644 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1645 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1646 | #[rustc_legacy_const_generics (4)] |
1647 | pub unsafe fn _mm_mask_shldi_epi32<const IMM8: i32>( |
1648 | src: __m128i, |
1649 | k: __mmask8, |
1650 | a: __m128i, |
1651 | b: __m128i, |
1652 | ) -> __m128i { |
1653 | static_assert_uimm_bits!(IMM8, 8); |
1654 | let shf: i32x4 = vpshldvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:_mm_set1_epi32(IMM8).as_i32x4()); |
1655 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4())) |
1656 | } |
1657 | |
1658 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1659 | /// |
1660 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi32&expand=5044) |
1661 | #[inline ] |
1662 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1663 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1664 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1665 | #[rustc_legacy_const_generics (3)] |
1666 | pub unsafe fn _mm_maskz_shldi_epi32<const IMM8: i32>( |
1667 | k: __mmask8, |
1668 | a: __m128i, |
1669 | b: __m128i, |
1670 | ) -> __m128i { |
1671 | static_assert_uimm_bits!(IMM8, 8); |
1672 | let shf: i32x4 = vpshldvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:_mm_set1_epi32(IMM8).as_i32x4()); |
1673 | let zero: i32x4 = _mm_setzero_si128().as_i32x4(); |
1674 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1675 | } |
1676 | |
1677 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). |
1678 | /// |
1679 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi16&expand=5042) |
1680 | #[inline ] |
1681 | #[target_feature (enable = "avx512vbmi2" )] |
1682 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1683 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1684 | #[rustc_legacy_const_generics (2)] |
1685 | pub unsafe fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
1686 | static_assert_uimm_bits!(IMM8, 8); |
1687 | let imm8: i16 = IMM8 as i16; |
1688 | transmute(src:vpshldvw( |
1689 | a:a.as_i16x32(), |
1690 | b:b.as_i16x32(), |
1691 | c:_mm512_set1_epi16(imm8).as_i16x32(), |
1692 | )) |
1693 | } |
1694 | |
1695 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1696 | /// |
1697 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi16&expand=5040) |
1698 | #[inline ] |
1699 | #[target_feature (enable = "avx512vbmi2" )] |
1700 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1701 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1702 | #[rustc_legacy_const_generics (4)] |
1703 | pub unsafe fn _mm512_mask_shldi_epi16<const IMM8: i32>( |
1704 | src: __m512i, |
1705 | k: __mmask32, |
1706 | a: __m512i, |
1707 | b: __m512i, |
1708 | ) -> __m512i { |
1709 | static_assert_uimm_bits!(IMM8, 8); |
1710 | let imm8: i16 = IMM8 as i16; |
1711 | let shf: i16x32 = vpshldvw( |
1712 | a:a.as_i16x32(), |
1713 | b:b.as_i16x32(), |
1714 | c:_mm512_set1_epi16(imm8).as_i16x32(), |
1715 | ); |
1716 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x32())) |
1717 | } |
1718 | |
1719 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1720 | /// |
1721 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi16&expand=5041) |
1722 | #[inline ] |
1723 | #[target_feature (enable = "avx512vbmi2" )] |
1724 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1725 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1726 | #[rustc_legacy_const_generics (3)] |
1727 | pub unsafe fn _mm512_maskz_shldi_epi16<const IMM8: i32>( |
1728 | k: __mmask32, |
1729 | a: __m512i, |
1730 | b: __m512i, |
1731 | ) -> __m512i { |
1732 | static_assert_uimm_bits!(IMM8, 8); |
1733 | let imm8: i16 = IMM8 as i16; |
1734 | let shf: i16x32 = vpshldvw( |
1735 | a:a.as_i16x32(), |
1736 | b:b.as_i16x32(), |
1737 | c:_mm512_set1_epi16(imm8).as_i16x32(), |
1738 | ); |
1739 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
1740 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1741 | } |
1742 | |
1743 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). |
1744 | /// |
1745 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi16&expand=5039) |
1746 | #[inline ] |
1747 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1748 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1749 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1750 | #[rustc_legacy_const_generics (2)] |
1751 | pub unsafe fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
1752 | static_assert_uimm_bits!(IMM8, 8); |
1753 | let imm8: i16 = IMM8 as i16; |
1754 | transmute(src:vpshldvw256( |
1755 | a:a.as_i16x16(), |
1756 | b:b.as_i16x16(), |
1757 | c:_mm256_set1_epi16(imm8).as_i16x16(), |
1758 | )) |
1759 | } |
1760 | |
1761 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1762 | /// |
1763 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi16&expand=5037) |
1764 | #[inline ] |
1765 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1766 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1767 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1768 | #[rustc_legacy_const_generics (4)] |
1769 | pub unsafe fn _mm256_mask_shldi_epi16<const IMM8: i32>( |
1770 | src: __m256i, |
1771 | k: __mmask16, |
1772 | a: __m256i, |
1773 | b: __m256i, |
1774 | ) -> __m256i { |
1775 | static_assert_uimm_bits!(IMM8, 8); |
1776 | let imm8: i16 = IMM8 as i16; |
1777 | let shf: i16x16 = vpshldvw256( |
1778 | a:a.as_i16x16(), |
1779 | b:b.as_i16x16(), |
1780 | c:_mm256_set1_epi16(imm8).as_i16x16(), |
1781 | ); |
1782 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x16())) |
1783 | } |
1784 | |
1785 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1786 | /// |
1787 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi16&expand=5038) |
1788 | #[inline ] |
1789 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1790 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1791 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1792 | #[rustc_legacy_const_generics (3)] |
1793 | pub unsafe fn _mm256_maskz_shldi_epi16<const IMM8: i32>( |
1794 | k: __mmask16, |
1795 | a: __m256i, |
1796 | b: __m256i, |
1797 | ) -> __m256i { |
1798 | static_assert_uimm_bits!(IMM8, 8); |
1799 | let imm8: i16 = IMM8 as i16; |
1800 | let shf: i16x16 = vpshldvw256( |
1801 | a:a.as_i16x16(), |
1802 | b:b.as_i16x16(), |
1803 | c:_mm256_set1_epi16(imm8).as_i16x16(), |
1804 | ); |
1805 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
1806 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1807 | } |
1808 | |
1809 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). |
1810 | /// |
1811 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi16&expand=5036) |
1812 | #[inline ] |
1813 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1814 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1815 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1816 | #[rustc_legacy_const_generics (2)] |
1817 | pub unsafe fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
1818 | static_assert_uimm_bits!(IMM8, 8); |
1819 | let imm8: i16 = IMM8 as i16; |
1820 | transmute(src:vpshldvw128( |
1821 | a:a.as_i16x8(), |
1822 | b:b.as_i16x8(), |
1823 | c:_mm_set1_epi16(imm8).as_i16x8(), |
1824 | )) |
1825 | } |
1826 | |
1827 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1828 | /// |
1829 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi16&expand=5034) |
1830 | #[inline ] |
1831 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1832 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1833 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1834 | #[rustc_legacy_const_generics (4)] |
1835 | pub unsafe fn _mm_mask_shldi_epi16<const IMM8: i32>( |
1836 | src: __m128i, |
1837 | k: __mmask8, |
1838 | a: __m128i, |
1839 | b: __m128i, |
1840 | ) -> __m128i { |
1841 | static_assert_uimm_bits!(IMM8, 8); |
1842 | let imm8: i16 = IMM8 as i16; |
1843 | let shf: i16x8 = vpshldvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:_mm_set1_epi16(imm8).as_i16x8()); |
1844 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x8())) |
1845 | } |
1846 | |
1847 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1848 | /// |
1849 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi16&expand=5035) |
1850 | #[inline ] |
1851 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1852 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1853 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1854 | #[rustc_legacy_const_generics (3)] |
1855 | pub unsafe fn _mm_maskz_shldi_epi16<const IMM8: i32>( |
1856 | k: __mmask8, |
1857 | a: __m128i, |
1858 | b: __m128i, |
1859 | ) -> __m128i { |
1860 | static_assert_uimm_bits!(IMM8, 8); |
1861 | let imm8: i16 = IMM8 as i16; |
1862 | let shf: i16x8 = vpshldvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:_mm_set1_epi16(imm8).as_i16x8()); |
1863 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
1864 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1865 | } |
1866 | |
1867 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. |
1868 | /// |
1869 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi64&expand=5114) |
1870 | #[inline ] |
1871 | #[target_feature (enable = "avx512vbmi2" )] |
1872 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1873 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1874 | #[rustc_legacy_const_generics (2)] |
1875 | pub unsafe fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
1876 | static_assert_uimm_bits!(IMM8, 8); |
1877 | let imm8: i64 = IMM8 as i64; |
1878 | transmute(src:vpshrdvq( |
1879 | a:a.as_i64x8(), |
1880 | b:b.as_i64x8(), |
1881 | c:_mm512_set1_epi64(imm8).as_i64x8(), |
1882 | )) |
1883 | } |
1884 | |
1885 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set). |
1886 | /// |
1887 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi64&expand=5112) |
1888 | #[inline ] |
1889 | #[target_feature (enable = "avx512vbmi2" )] |
1890 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1891 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1892 | #[rustc_legacy_const_generics (4)] |
1893 | pub unsafe fn _mm512_mask_shrdi_epi64<const IMM8: i32>( |
1894 | src: __m512i, |
1895 | k: __mmask8, |
1896 | a: __m512i, |
1897 | b: __m512i, |
1898 | ) -> __m512i { |
1899 | static_assert_uimm_bits!(IMM8, 8); |
1900 | let imm8: i64 = IMM8 as i64; |
1901 | let shf: i64x8 = vpshrdvq( |
1902 | a:a.as_i64x8(), |
1903 | b:b.as_i64x8(), |
1904 | c:_mm512_set1_epi64(imm8).as_i64x8(), |
1905 | ); |
1906 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8())) |
1907 | } |
1908 | |
1909 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1910 | /// |
1911 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi64&expand=5113) |
1912 | #[inline ] |
1913 | #[target_feature (enable = "avx512vbmi2" )] |
1914 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1915 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq |
1916 | #[rustc_legacy_const_generics (3)] |
1917 | pub unsafe fn _mm512_maskz_shrdi_epi64<const IMM8: i32>( |
1918 | k: __mmask8, |
1919 | a: __m512i, |
1920 | b: __m512i, |
1921 | ) -> __m512i { |
1922 | static_assert_uimm_bits!(IMM8, 8); |
1923 | let imm8: i64 = IMM8 as i64; |
1924 | let shf: i64x8 = vpshrdvq( |
1925 | a:a.as_i64x8(), |
1926 | b:b.as_i64x8(), |
1927 | c:_mm512_set1_epi64(imm8).as_i64x8(), |
1928 | ); |
1929 | let zero: i64x8 = _mm512_setzero_si512().as_i64x8(); |
1930 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1931 | } |
1932 | |
1933 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. |
1934 | /// |
1935 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi64&expand=5111) |
1936 | #[inline ] |
1937 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1938 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1939 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1940 | #[rustc_legacy_const_generics (2)] |
1941 | pub unsafe fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
1942 | static_assert_uimm_bits!(IMM8, 8); |
1943 | let imm8: i64 = IMM8 as i64; |
1944 | transmute(src:vpshrdvq256( |
1945 | a:a.as_i64x4(), |
1946 | b:b.as_i64x4(), |
1947 | c:_mm256_set1_epi64x(imm8).as_i64x4(), |
1948 | )) |
1949 | } |
1950 | |
1951 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set). |
1952 | /// |
1953 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi64&expand=5109) |
1954 | #[inline ] |
1955 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1956 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1957 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1958 | #[rustc_legacy_const_generics (4)] |
1959 | pub unsafe fn _mm256_mask_shrdi_epi64<const IMM8: i32>( |
1960 | src: __m256i, |
1961 | k: __mmask8, |
1962 | a: __m256i, |
1963 | b: __m256i, |
1964 | ) -> __m256i { |
1965 | static_assert_uimm_bits!(IMM8, 8); |
1966 | let imm8: i64 = IMM8 as i64; |
1967 | let shf: i64x4 = vpshrdvq256( |
1968 | a:a.as_i64x4(), |
1969 | b:b.as_i64x4(), |
1970 | c:_mm256_set1_epi64x(imm8).as_i64x4(), |
1971 | ); |
1972 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4())) |
1973 | } |
1974 | |
1975 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1976 | /// |
1977 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi64&expand=5110) |
1978 | #[inline ] |
1979 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1980 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1981 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1982 | #[rustc_legacy_const_generics (3)] |
1983 | pub unsafe fn _mm256_maskz_shrdi_epi64<const IMM8: i32>( |
1984 | k: __mmask8, |
1985 | a: __m256i, |
1986 | b: __m256i, |
1987 | ) -> __m256i { |
1988 | static_assert_uimm_bits!(IMM8, 8); |
1989 | let imm8: i64 = IMM8 as i64; |
1990 | let shf: i64x4 = vpshrdvq256( |
1991 | a:a.as_i64x4(), |
1992 | b:b.as_i64x4(), |
1993 | c:_mm256_set1_epi64x(imm8).as_i64x4(), |
1994 | ); |
1995 | let zero: i64x4 = _mm256_setzero_si256().as_i64x4(); |
1996 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
1997 | } |
1998 | |
1999 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. |
2000 | /// |
2001 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi64&expand=5108) |
2002 | #[inline ] |
2003 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2004 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2005 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
2006 | #[rustc_legacy_const_generics (2)] |
2007 | pub unsafe fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
2008 | static_assert_uimm_bits!(IMM8, 8); |
2009 | let imm8: i64 = IMM8 as i64; |
2010 | transmute(src:vpshrdvq128( |
2011 | a:a.as_i64x2(), |
2012 | b:b.as_i64x2(), |
2013 | c:_mm_set1_epi64x(imm8).as_i64x2(), |
2014 | )) |
2015 | } |
2016 | |
2017 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set). |
2018 | /// |
2019 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi64&expand=5106) |
2020 | #[inline ] |
2021 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2022 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2023 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
2024 | #[rustc_legacy_const_generics (4)] |
2025 | pub unsafe fn _mm_mask_shrdi_epi64<const IMM8: i32>( |
2026 | src: __m128i, |
2027 | k: __mmask8, |
2028 | a: __m128i, |
2029 | b: __m128i, |
2030 | ) -> __m128i { |
2031 | static_assert_uimm_bits!(IMM8, 8); |
2032 | let imm8: i64 = IMM8 as i64; |
2033 | let shf: i64x2 = vpshrdvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:_mm_set1_epi64x(imm8).as_i64x2()); |
2034 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2())) |
2035 | } |
2036 | |
2037 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2038 | /// |
2039 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi64&expand=5107) |
2040 | #[inline ] |
2041 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2042 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2043 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
2044 | #[rustc_legacy_const_generics (3)] |
2045 | pub unsafe fn _mm_maskz_shrdi_epi64<const IMM8: i32>( |
2046 | k: __mmask8, |
2047 | a: __m128i, |
2048 | b: __m128i, |
2049 | ) -> __m128i { |
2050 | static_assert_uimm_bits!(IMM8, 8); |
2051 | let imm8: i64 = IMM8 as i64; |
2052 | let shf: i64x2 = vpshrdvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:_mm_set1_epi64x(imm8).as_i64x2()); |
2053 | let zero: i64x2 = _mm_setzero_si128().as_i64x2(); |
2054 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
2055 | } |
2056 | |
2057 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. |
2058 | /// |
2059 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi32&expand=5105) |
2060 | #[inline ] |
2061 | #[target_feature (enable = "avx512vbmi2" )] |
2062 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2063 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
2064 | #[rustc_legacy_const_generics (2)] |
2065 | pub unsafe fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
2066 | static_assert_uimm_bits!(IMM8, 8); |
2067 | transmute(src:vpshrdvd( |
2068 | a:a.as_i32x16(), |
2069 | b:b.as_i32x16(), |
2070 | c:_mm512_set1_epi32(IMM8).as_i32x16(), |
2071 | )) |
2072 | } |
2073 | |
2074 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2075 | /// |
2076 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi32&expand=5103) |
2077 | #[inline ] |
2078 | #[target_feature (enable = "avx512vbmi2" )] |
2079 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2080 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
2081 | #[rustc_legacy_const_generics (4)] |
2082 | pub unsafe fn _mm512_mask_shrdi_epi32<const IMM8: i32>( |
2083 | src: __m512i, |
2084 | k: __mmask16, |
2085 | a: __m512i, |
2086 | b: __m512i, |
2087 | ) -> __m512i { |
2088 | static_assert_uimm_bits!(IMM8, 8); |
2089 | let shf: i32x16 = vpshrdvd( |
2090 | a:a.as_i32x16(), |
2091 | b:b.as_i32x16(), |
2092 | c:_mm512_set1_epi32(IMM8).as_i32x16(), |
2093 | ); |
2094 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16())) |
2095 | } |
2096 | |
2097 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2098 | /// |
2099 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi32&expand=5104) |
2100 | #[inline ] |
2101 | #[target_feature (enable = "avx512vbmi2" )] |
2102 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2103 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
2104 | #[rustc_legacy_const_generics (3)] |
2105 | pub unsafe fn _mm512_maskz_shrdi_epi32<const IMM8: i32>( |
2106 | k: __mmask16, |
2107 | a: __m512i, |
2108 | b: __m512i, |
2109 | ) -> __m512i { |
2110 | static_assert_uimm_bits!(IMM8, 8); |
2111 | let shf: i32x16 = vpshrdvd( |
2112 | a:a.as_i32x16(), |
2113 | b:b.as_i32x16(), |
2114 | c:_mm512_set1_epi32(IMM8).as_i32x16(), |
2115 | ); |
2116 | let zero: i32x16 = _mm512_setzero_si512().as_i32x16(); |
2117 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
2118 | } |
2119 | |
2120 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. |
2121 | /// |
2122 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi32&expand=5102) |
2123 | #[inline ] |
2124 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2125 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2126 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
2127 | #[rustc_legacy_const_generics (2)] |
2128 | pub unsafe fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
2129 | static_assert_uimm_bits!(IMM8, 8); |
2130 | transmute(src:vpshrdvd256( |
2131 | a:a.as_i32x8(), |
2132 | b:b.as_i32x8(), |
2133 | c:_mm256_set1_epi32(IMM8).as_i32x8(), |
2134 | )) |
2135 | } |
2136 | |
2137 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2138 | /// |
2139 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi32&expand=5100) |
2140 | #[inline ] |
2141 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2142 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2143 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
2144 | #[rustc_legacy_const_generics (4)] |
2145 | pub unsafe fn _mm256_mask_shrdi_epi32<const IMM8: i32>( |
2146 | src: __m256i, |
2147 | k: __mmask8, |
2148 | a: __m256i, |
2149 | b: __m256i, |
2150 | ) -> __m256i { |
2151 | static_assert_uimm_bits!(IMM8, 8); |
2152 | let shf: i32x8 = vpshrdvd256( |
2153 | a:a.as_i32x8(), |
2154 | b:b.as_i32x8(), |
2155 | c:_mm256_set1_epi32(IMM8).as_i32x8(), |
2156 | ); |
2157 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8())) |
2158 | } |
2159 | |
2160 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2161 | /// |
2162 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi32&expand=5101) |
2163 | #[inline ] |
2164 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2165 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2166 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
2167 | #[rustc_legacy_const_generics (3)] |
2168 | pub unsafe fn _mm256_maskz_shrdi_epi32<const IMM8: i32>( |
2169 | k: __mmask8, |
2170 | a: __m256i, |
2171 | b: __m256i, |
2172 | ) -> __m256i { |
2173 | static_assert_uimm_bits!(IMM8, 8); |
2174 | let shf: i32x8 = vpshrdvd256( |
2175 | a:a.as_i32x8(), |
2176 | b:b.as_i32x8(), |
2177 | c:_mm256_set1_epi32(IMM8).as_i32x8(), |
2178 | ); |
2179 | let zero: i32x8 = _mm256_setzero_si256().as_i32x8(); |
2180 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
2181 | } |
2182 | |
2183 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. |
2184 | /// |
2185 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi32&expand=5099) |
2186 | #[inline ] |
2187 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2188 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2189 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
2190 | #[rustc_legacy_const_generics (2)] |
2191 | pub unsafe fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
2192 | static_assert_uimm_bits!(IMM8, 8); |
2193 | transmute(src:vpshrdvd128( |
2194 | a:a.as_i32x4(), |
2195 | b:b.as_i32x4(), |
2196 | c:_mm_set1_epi32(IMM8).as_i32x4(), |
2197 | )) |
2198 | } |
2199 | |
2200 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2201 | /// |
2202 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi32&expand=5097) |
2203 | #[inline ] |
2204 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2205 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2206 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
2207 | #[rustc_legacy_const_generics (4)] |
2208 | pub unsafe fn _mm_mask_shrdi_epi32<const IMM8: i32>( |
2209 | src: __m128i, |
2210 | k: __mmask8, |
2211 | a: __m128i, |
2212 | b: __m128i, |
2213 | ) -> __m128i { |
2214 | static_assert_uimm_bits!(IMM8, 8); |
2215 | let shf: i32x4 = vpshrdvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:_mm_set1_epi32(IMM8).as_i32x4()); |
2216 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4())) |
2217 | } |
2218 | |
2219 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2220 | /// |
2221 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi32&expand=5098) |
2222 | #[inline ] |
2223 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2224 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2225 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
2226 | #[rustc_legacy_const_generics (3)] |
2227 | pub unsafe fn _mm_maskz_shrdi_epi32<const IMM8: i32>( |
2228 | k: __mmask8, |
2229 | a: __m128i, |
2230 | b: __m128i, |
2231 | ) -> __m128i { |
2232 | static_assert_uimm_bits!(IMM8, 8); |
2233 | let shf: i32x4 = vpshrdvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:_mm_set1_epi32(IMM8).as_i32x4()); |
2234 | let zero: i32x4 = _mm_setzero_si128().as_i32x4(); |
2235 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
2236 | } |
2237 | |
2238 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. |
2239 | /// |
2240 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi16&expand=5096) |
2241 | #[inline ] |
2242 | #[target_feature (enable = "avx512vbmi2" )] |
2243 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2244 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2245 | #[rustc_legacy_const_generics (2)] |
2246 | pub unsafe fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
2247 | static_assert_uimm_bits!(IMM8, 8); |
2248 | let imm8: i16 = IMM8 as i16; |
2249 | assert!(matches!(imm8, 0..=255)); |
2250 | transmute(src:vpshrdvw( |
2251 | a:a.as_i16x32(), |
2252 | b:b.as_i16x32(), |
2253 | c:_mm512_set1_epi16(imm8).as_i16x32(), |
2254 | )) |
2255 | } |
2256 | |
2257 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2258 | /// |
2259 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi16&expand=5094) |
2260 | #[inline ] |
2261 | #[target_feature (enable = "avx512vbmi2" )] |
2262 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2263 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2264 | #[rustc_legacy_const_generics (4)] |
2265 | pub unsafe fn _mm512_mask_shrdi_epi16<const IMM8: i32>( |
2266 | src: __m512i, |
2267 | k: __mmask32, |
2268 | a: __m512i, |
2269 | b: __m512i, |
2270 | ) -> __m512i { |
2271 | static_assert_uimm_bits!(IMM8, 8); |
2272 | let imm8: i16 = IMM8 as i16; |
2273 | assert!(matches!(imm8, 0..=255)); |
2274 | let shf: i16x32 = vpshrdvw( |
2275 | a:a.as_i16x32(), |
2276 | b:b.as_i16x32(), |
2277 | c:_mm512_set1_epi16(imm8).as_i16x32(), |
2278 | ); |
2279 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x32())) |
2280 | } |
2281 | |
2282 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2283 | /// |
2284 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi16&expand=5095) |
2285 | #[inline ] |
2286 | #[target_feature (enable = "avx512vbmi2" )] |
2287 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2288 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2289 | #[rustc_legacy_const_generics (3)] |
2290 | pub unsafe fn _mm512_maskz_shrdi_epi16<const IMM8: i32>( |
2291 | k: __mmask32, |
2292 | a: __m512i, |
2293 | b: __m512i, |
2294 | ) -> __m512i { |
2295 | static_assert_uimm_bits!(IMM8, 8); |
2296 | let imm8: i16 = IMM8 as i16; |
2297 | assert!(matches!(imm8, 0..=255)); |
2298 | let shf: i16x32 = vpshrdvw( |
2299 | a:a.as_i16x32(), |
2300 | b:b.as_i16x32(), |
2301 | c:_mm512_set1_epi16(imm8).as_i16x32(), |
2302 | ); |
2303 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
2304 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
2305 | } |
2306 | |
2307 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. |
2308 | /// |
2309 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi16&expand=5093) |
2310 | #[inline ] |
2311 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2312 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2313 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2314 | #[rustc_legacy_const_generics (2)] |
2315 | pub unsafe fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
2316 | static_assert_uimm_bits!(IMM8, 8); |
2317 | let imm8: i16 = IMM8 as i16; |
2318 | assert!(matches!(imm8, 0..=255)); |
2319 | transmute(src:vpshrdvw256( |
2320 | a:a.as_i16x16(), |
2321 | b:b.as_i16x16(), |
2322 | c:_mm256_set1_epi16(imm8).as_i16x16(), |
2323 | )) |
2324 | } |
2325 | |
2326 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2327 | /// |
2328 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi16&expand=5091) |
2329 | #[inline ] |
2330 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2331 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2332 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2333 | #[rustc_legacy_const_generics (4)] |
2334 | pub unsafe fn _mm256_mask_shrdi_epi16<const IMM8: i32>( |
2335 | src: __m256i, |
2336 | k: __mmask16, |
2337 | a: __m256i, |
2338 | b: __m256i, |
2339 | ) -> __m256i { |
2340 | static_assert_uimm_bits!(IMM8, 8); |
2341 | let imm8: i16 = IMM8 as i16; |
2342 | assert!(matches!(imm8, 0..=255)); |
2343 | let shf: i16x16 = vpshrdvw256( |
2344 | a:a.as_i16x16(), |
2345 | b:b.as_i16x16(), |
2346 | c:_mm256_set1_epi16(imm8).as_i16x16(), |
2347 | ); |
2348 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x16())) |
2349 | } |
2350 | |
2351 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2352 | /// |
2353 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi16&expand=5092) |
2354 | #[inline ] |
2355 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2356 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2357 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2358 | #[rustc_legacy_const_generics (3)] |
2359 | pub unsafe fn _mm256_maskz_shrdi_epi16<const IMM8: i32>( |
2360 | k: __mmask16, |
2361 | a: __m256i, |
2362 | b: __m256i, |
2363 | ) -> __m256i { |
2364 | static_assert_uimm_bits!(IMM8, 8); |
2365 | let imm8: i16 = IMM8 as i16; |
2366 | let shf: i16x16 = vpshrdvw256( |
2367 | a:a.as_i16x16(), |
2368 | b:b.as_i16x16(), |
2369 | c:_mm256_set1_epi16(imm8).as_i16x16(), |
2370 | ); |
2371 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
2372 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
2373 | } |
2374 | |
2375 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. |
2376 | /// |
2377 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi16&expand=5090) |
2378 | #[inline ] |
2379 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2380 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2381 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2382 | #[rustc_legacy_const_generics (2)] |
2383 | pub unsafe fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
2384 | static_assert_uimm_bits!(IMM8, 8); |
2385 | let imm8: i16 = IMM8 as i16; |
2386 | transmute(src:vpshrdvw128( |
2387 | a:a.as_i16x8(), |
2388 | b:b.as_i16x8(), |
2389 | c:_mm_set1_epi16(imm8).as_i16x8(), |
2390 | )) |
2391 | } |
2392 | |
2393 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2394 | /// |
2395 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi16&expand=5088) |
2396 | #[inline ] |
2397 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2398 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2399 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2400 | #[rustc_legacy_const_generics (4)] |
2401 | pub unsafe fn _mm_mask_shrdi_epi16<const IMM8: i32>( |
2402 | src: __m128i, |
2403 | k: __mmask8, |
2404 | a: __m128i, |
2405 | b: __m128i, |
2406 | ) -> __m128i { |
2407 | static_assert_uimm_bits!(IMM8, 8); |
2408 | let imm8: i16 = IMM8 as i16; |
2409 | let shf: i16x8 = vpshrdvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:_mm_set1_epi16(imm8).as_i16x8()); |
2410 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x8())) |
2411 | } |
2412 | |
2413 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2414 | /// |
2415 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi16&expand=5089) |
2416 | #[inline ] |
2417 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2418 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2419 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2420 | #[rustc_legacy_const_generics (3)] |
2421 | pub unsafe fn _mm_maskz_shrdi_epi16<const IMM8: i32>( |
2422 | k: __mmask8, |
2423 | a: __m128i, |
2424 | b: __m128i, |
2425 | ) -> __m128i { |
2426 | static_assert_uimm_bits!(IMM8, 8); |
2427 | let imm8: i16 = IMM8 as i16; |
2428 | let shf: i16x8 = vpshrdvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:_mm_set1_epi16(imm8).as_i16x8()); |
2429 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
2430 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero)) |
2431 | } |
2432 | |
2433 | #[allow (improper_ctypes)] |
2434 | extern "C" { |
2435 | #[link_name = "llvm.x86.avx512.mask.compress.store.w.512" ] |
2436 | fn vcompressstorew(mem: *mut i8, data: i16x32, mask: u32); |
2437 | #[link_name = "llvm.x86.avx512.mask.compress.store.w.256" ] |
2438 | fn vcompressstorew256(mem: *mut i8, data: i16x16, mask: u16); |
2439 | #[link_name = "llvm.x86.avx512.mask.compress.store.w.128" ] |
2440 | fn vcompressstorew128(mem: *mut i8, data: i16x8, mask: u8); |
2441 | |
2442 | #[link_name = "llvm.x86.avx512.mask.compress.store.b.512" ] |
2443 | fn vcompressstoreb(mem: *mut i8, data: i8x64, mask: u64); |
2444 | #[link_name = "llvm.x86.avx512.mask.compress.store.b.256" ] |
2445 | fn vcompressstoreb256(mem: *mut i8, data: i8x32, mask: u32); |
2446 | #[link_name = "llvm.x86.avx512.mask.compress.store.b.128" ] |
2447 | fn vcompressstoreb128(mem: *mut i8, data: i8x16, mask: u16); |
2448 | |
2449 | #[link_name = "llvm.x86.avx512.mask.compress.w.512" ] |
2450 | fn vpcompressw(a: i16x32, src: i16x32, mask: u32) -> i16x32; |
2451 | #[link_name = "llvm.x86.avx512.mask.compress.w.256" ] |
2452 | fn vpcompressw256(a: i16x16, src: i16x16, mask: u16) -> i16x16; |
2453 | #[link_name = "llvm.x86.avx512.mask.compress.w.128" ] |
2454 | fn vpcompressw128(a: i16x8, src: i16x8, mask: u8) -> i16x8; |
2455 | |
2456 | #[link_name = "llvm.x86.avx512.mask.compress.b.512" ] |
2457 | fn vpcompressb(a: i8x64, src: i8x64, mask: u64) -> i8x64; |
2458 | #[link_name = "llvm.x86.avx512.mask.compress.b.256" ] |
2459 | fn vpcompressb256(a: i8x32, src: i8x32, mask: u32) -> i8x32; |
2460 | #[link_name = "llvm.x86.avx512.mask.compress.b.128" ] |
2461 | fn vpcompressb128(a: i8x16, src: i8x16, mask: u16) -> i8x16; |
2462 | |
2463 | #[link_name = "llvm.x86.avx512.mask.expand.w.512" ] |
2464 | fn vpexpandw(a: i16x32, src: i16x32, mask: u32) -> i16x32; |
2465 | #[link_name = "llvm.x86.avx512.mask.expand.w.256" ] |
2466 | fn vpexpandw256(a: i16x16, src: i16x16, mask: u16) -> i16x16; |
2467 | #[link_name = "llvm.x86.avx512.mask.expand.w.128" ] |
2468 | fn vpexpandw128(a: i16x8, src: i16x8, mask: u8) -> i16x8; |
2469 | |
2470 | #[link_name = "llvm.x86.avx512.mask.expand.b.512" ] |
2471 | fn vpexpandb(a: i8x64, src: i8x64, mask: u64) -> i8x64; |
2472 | #[link_name = "llvm.x86.avx512.mask.expand.b.256" ] |
2473 | fn vpexpandb256(a: i8x32, src: i8x32, mask: u32) -> i8x32; |
2474 | #[link_name = "llvm.x86.avx512.mask.expand.b.128" ] |
2475 | fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16; |
2476 | |
2477 | #[link_name = "llvm.fshl.v8i64" ] |
2478 | fn vpshldvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8; |
2479 | #[link_name = "llvm.fshl.v4i64" ] |
2480 | fn vpshldvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4; |
2481 | #[link_name = "llvm.fshl.v2i64" ] |
2482 | fn vpshldvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2; |
2483 | #[link_name = "llvm.fshl.v16i32" ] |
2484 | fn vpshldvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16; |
2485 | #[link_name = "llvm.fshl.v8i32" ] |
2486 | fn vpshldvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8; |
2487 | #[link_name = "llvm.fshl.v4i32" ] |
2488 | fn vpshldvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4; |
2489 | #[link_name = "llvm.fshl.v32i16" ] |
2490 | fn vpshldvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32; |
2491 | #[link_name = "llvm.fshl.v16i16" ] |
2492 | fn vpshldvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16; |
2493 | #[link_name = "llvm.fshl.v8i16" ] |
2494 | fn vpshldvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8; |
2495 | |
2496 | #[link_name = "llvm.fshr.v8i64" ] |
2497 | fn vpshrdvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8; |
2498 | #[link_name = "llvm.fshr.v4i64" ] |
2499 | fn vpshrdvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4; |
2500 | #[link_name = "llvm.fshr.v2i64" ] |
2501 | fn vpshrdvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2; |
2502 | #[link_name = "llvm.fshr.v16i32" ] |
2503 | fn vpshrdvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16; |
2504 | #[link_name = "llvm.fshr.v8i32" ] |
2505 | fn vpshrdvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8; |
2506 | #[link_name = "llvm.fshr.v4i32" ] |
2507 | fn vpshrdvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4; |
2508 | #[link_name = "llvm.fshr.v32i16" ] |
2509 | fn vpshrdvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32; |
2510 | #[link_name = "llvm.fshr.v16i16" ] |
2511 | fn vpshrdvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16; |
2512 | #[link_name = "llvm.fshr.v8i16" ] |
2513 | fn vpshrdvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8; |
2514 | } |
2515 | |
2516 | #[cfg (test)] |
2517 | mod tests { |
2518 | |
2519 | use stdarch_test::simd_test; |
2520 | |
2521 | use crate::core_arch::x86::*; |
2522 | use crate::hint::black_box; |
2523 | |
2524 | #[simd_test(enable = "avx512vbmi2" )] |
2525 | unsafe fn test_mm512_mask_compress_epi16() { |
2526 | let src = _mm512_set1_epi16(200); |
2527 | #[rustfmt::skip] |
2528 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2529 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2530 | let r = _mm512_mask_compress_epi16(src, 0b01010101_01010101_01010101_01010101, a); |
2531 | #[rustfmt::skip] |
2532 | let e = _mm512_set_epi16( |
2533 | 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, |
2534 | 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, |
2535 | ); |
2536 | assert_eq_m512i(r, e); |
2537 | } |
2538 | |
2539 | #[simd_test(enable = "avx512vbmi2" )] |
2540 | unsafe fn test_mm512_maskz_compress_epi16() { |
2541 | #[rustfmt::skip] |
2542 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2543 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2544 | let r = _mm512_maskz_compress_epi16(0b01010101_01010101_01010101_01010101, a); |
2545 | #[rustfmt::skip] |
2546 | let e = _mm512_set_epi16( |
2547 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2548 | 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, |
2549 | ); |
2550 | assert_eq_m512i(r, e); |
2551 | } |
2552 | |
2553 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2554 | unsafe fn test_mm256_mask_compress_epi16() { |
2555 | let src = _mm256_set1_epi16(200); |
2556 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2557 | let r = _mm256_mask_compress_epi16(src, 0b01010101_01010101, a); |
2558 | let e = _mm256_set_epi16( |
2559 | 200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15, |
2560 | ); |
2561 | assert_eq_m256i(r, e); |
2562 | } |
2563 | |
2564 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2565 | unsafe fn test_mm256_maskz_compress_epi16() { |
2566 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2567 | let r = _mm256_maskz_compress_epi16(0b01010101_01010101, a); |
2568 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15); |
2569 | assert_eq_m256i(r, e); |
2570 | } |
2571 | |
2572 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2573 | unsafe fn test_mm_mask_compress_epi16() { |
2574 | let src = _mm_set1_epi16(200); |
2575 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
2576 | let r = _mm_mask_compress_epi16(src, 0b01010101, a); |
2577 | let e = _mm_set_epi16(200, 200, 200, 200, 1, 3, 5, 7); |
2578 | assert_eq_m128i(r, e); |
2579 | } |
2580 | |
2581 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2582 | unsafe fn test_mm_maskz_compress_epi16() { |
2583 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
2584 | let r = _mm_maskz_compress_epi16(0b01010101, a); |
2585 | let e = _mm_set_epi16(0, 0, 0, 0, 1, 3, 5, 7); |
2586 | assert_eq_m128i(r, e); |
2587 | } |
2588 | |
2589 | #[simd_test(enable = "avx512vbmi2" )] |
2590 | unsafe fn test_mm512_mask_compress_epi8() { |
2591 | let src = _mm512_set1_epi8(100); |
2592 | #[rustfmt::skip] |
2593 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2594 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
2595 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
2596 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
2597 | let r = _mm512_mask_compress_epi8( |
2598 | src, |
2599 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101, |
2600 | a, |
2601 | ); |
2602 | #[rustfmt::skip] |
2603 | let e = _mm512_set_epi8( |
2604 | 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, |
2605 | 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, |
2606 | 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, |
2607 | 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, |
2608 | ); |
2609 | assert_eq_m512i(r, e); |
2610 | } |
2611 | |
2612 | #[simd_test(enable = "avx512vbmi2" )] |
2613 | unsafe fn test_mm512_maskz_compress_epi8() { |
2614 | #[rustfmt::skip] |
2615 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2616 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
2617 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
2618 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
2619 | let r = _mm512_maskz_compress_epi8( |
2620 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101, |
2621 | a, |
2622 | ); |
2623 | #[rustfmt::skip] |
2624 | let e = _mm512_set_epi8( |
2625 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2626 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2627 | 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, |
2628 | 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, |
2629 | ); |
2630 | assert_eq_m512i(r, e); |
2631 | } |
2632 | |
2633 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2634 | unsafe fn test_mm256_mask_compress_epi8() { |
2635 | let src = _mm256_set1_epi8(100); |
2636 | #[rustfmt::skip] |
2637 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2638 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2639 | let r = _mm256_mask_compress_epi8(src, 0b01010101_01010101_01010101_01010101, a); |
2640 | #[rustfmt::skip] |
2641 | let e = _mm256_set_epi8( |
2642 | 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, |
2643 | 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, |
2644 | ); |
2645 | assert_eq_m256i(r, e); |
2646 | } |
2647 | |
2648 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2649 | unsafe fn test_mm256_maskz_compress_epi8() { |
2650 | #[rustfmt::skip] |
2651 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2652 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2653 | let r = _mm256_maskz_compress_epi8(0b01010101_01010101_01010101_01010101, a); |
2654 | #[rustfmt::skip] |
2655 | let e = _mm256_set_epi8( |
2656 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2657 | 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, |
2658 | ); |
2659 | assert_eq_m256i(r, e); |
2660 | } |
2661 | |
2662 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2663 | unsafe fn test_mm_mask_compress_epi8() { |
2664 | let src = _mm_set1_epi8(100); |
2665 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2666 | let r = _mm_mask_compress_epi8(src, 0b01010101_01010101, a); |
2667 | let e = _mm_set_epi8( |
2668 | 100, 100, 100, 100, 100, 100, 100, 100, 1, 3, 5, 7, 9, 11, 13, 15, |
2669 | ); |
2670 | assert_eq_m128i(r, e); |
2671 | } |
2672 | |
2673 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2674 | unsafe fn test_mm_maskz_compress_epi8() { |
2675 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2676 | let r = _mm_maskz_compress_epi8(0b01010101_01010101, a); |
2677 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15); |
2678 | assert_eq_m128i(r, e); |
2679 | } |
2680 | |
2681 | #[simd_test(enable = "avx512vbmi2" )] |
2682 | unsafe fn test_mm512_mask_expand_epi16() { |
2683 | let src = _mm512_set1_epi16(200); |
2684 | #[rustfmt::skip] |
2685 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2686 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2687 | let r = _mm512_mask_expand_epi16(src, 0b01010101_01010101_01010101_01010101, a); |
2688 | #[rustfmt::skip] |
2689 | let e = _mm512_set_epi16( |
2690 | 200, 16, 200, 17, 200, 18, 200, 19, 200, 20, 200, 21, 200, 22, 200, 23, |
2691 | 200, 24, 200, 25, 200, 26, 200, 27, 200, 28, 200, 29, 200, 30, 200, 31, |
2692 | ); |
2693 | assert_eq_m512i(r, e); |
2694 | } |
2695 | |
2696 | #[simd_test(enable = "avx512vbmi2" )] |
2697 | unsafe fn test_mm512_maskz_expand_epi16() { |
2698 | #[rustfmt::skip] |
2699 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2700 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2701 | let r = _mm512_maskz_expand_epi16(0b01010101_01010101_01010101_01010101, a); |
2702 | #[rustfmt::skip] |
2703 | let e = _mm512_set_epi16(0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, |
2704 | 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31); |
2705 | assert_eq_m512i(r, e); |
2706 | } |
2707 | |
2708 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2709 | unsafe fn test_mm256_mask_expand_epi16() { |
2710 | let src = _mm256_set1_epi16(200); |
2711 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2712 | let r = _mm256_mask_expand_epi16(src, 0b01010101_01010101, a); |
2713 | let e = _mm256_set_epi16( |
2714 | 200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15, |
2715 | ); |
2716 | assert_eq_m256i(r, e); |
2717 | } |
2718 | |
2719 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2720 | unsafe fn test_mm256_maskz_expand_epi16() { |
2721 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2722 | let r = _mm256_maskz_expand_epi16(0b01010101_01010101, a); |
2723 | let e = _mm256_set_epi16(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15); |
2724 | assert_eq_m256i(r, e); |
2725 | } |
2726 | |
2727 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2728 | unsafe fn test_mm_mask_expand_epi16() { |
2729 | let src = _mm_set1_epi16(200); |
2730 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
2731 | let r = _mm_mask_expand_epi16(src, 0b01010101, a); |
2732 | let e = _mm_set_epi16(200, 4, 200, 5, 200, 6, 200, 7); |
2733 | assert_eq_m128i(r, e); |
2734 | } |
2735 | |
2736 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2737 | unsafe fn test_mm_maskz_expand_epi16() { |
2738 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
2739 | let r = _mm_maskz_expand_epi16(0b01010101, a); |
2740 | let e = _mm_set_epi16(0, 4, 0, 5, 0, 6, 0, 7); |
2741 | assert_eq_m128i(r, e); |
2742 | } |
2743 | |
2744 | #[simd_test(enable = "avx512vbmi2" )] |
2745 | unsafe fn test_mm512_mask_expand_epi8() { |
2746 | let src = _mm512_set1_epi8(100); |
2747 | #[rustfmt::skip] |
2748 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2749 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
2750 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
2751 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
2752 | let r = _mm512_mask_expand_epi8( |
2753 | src, |
2754 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101, |
2755 | a, |
2756 | ); |
2757 | #[rustfmt::skip] |
2758 | let e = _mm512_set_epi8( |
2759 | 100, 32, 100, 33, 100, 34, 100, 35, 100, 36, 100, 37, 100, 38, 100, 39, |
2760 | 100, 40, 100, 41, 100, 42, 100, 43, 100, 44, 100, 45, 100, 46, 100, 47, |
2761 | 100, 48, 100, 49, 100, 50, 100, 51, 100, 52, 100, 53, 100, 54, 100, 55, |
2762 | 100, 56, 100, 57, 100, 58, 100, 59, 100, 60, 100, 61, 100, 62, 100, 63, |
2763 | ); |
2764 | assert_eq_m512i(r, e); |
2765 | } |
2766 | |
2767 | #[simd_test(enable = "avx512vbmi2" )] |
2768 | unsafe fn test_mm512_maskz_expand_epi8() { |
2769 | #[rustfmt::skip] |
2770 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2771 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
2772 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
2773 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
2774 | let r = _mm512_maskz_expand_epi8( |
2775 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101, |
2776 | a, |
2777 | ); |
2778 | #[rustfmt::skip] |
2779 | let e = _mm512_set_epi8( |
2780 | 0, 32, 0, 33, 0, 34, 0, 35, 0, 36, 0, 37, 0, 38, 0, 39, |
2781 | 0, 40, 0, 41, 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, 0, 47, |
2782 | 0, 48, 0, 49, 0, 50, 0, 51, 0, 52, 0, 53, 0, 54, 0, 55, |
2783 | 0, 56, 0, 57, 0, 58, 0, 59, 0, 60, 0, 61, 0, 62, 0, 63, |
2784 | ); |
2785 | assert_eq_m512i(r, e); |
2786 | } |
2787 | |
2788 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2789 | unsafe fn test_mm256_mask_expand_epi8() { |
2790 | let src = _mm256_set1_epi8(100); |
2791 | #[rustfmt::skip] |
2792 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2793 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2794 | let r = _mm256_mask_expand_epi8(src, 0b01010101_01010101_01010101_01010101, a); |
2795 | #[rustfmt::skip] |
2796 | let e = _mm256_set_epi8( |
2797 | 100, 16, 100, 17, 100, 18, 100, 19, 100, 20, 100, 21, 100, 22, 100, 23, |
2798 | 100, 24, 100, 25, 100, 26, 100, 27, 100, 28, 100, 29, 100, 30, 100, 31, |
2799 | ); |
2800 | assert_eq_m256i(r, e); |
2801 | } |
2802 | |
2803 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2804 | unsafe fn test_mm256_maskz_expand_epi8() { |
2805 | #[rustfmt::skip] |
2806 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2807 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2808 | let r = _mm256_maskz_expand_epi8(0b01010101_01010101_01010101_01010101, a); |
2809 | #[rustfmt::skip] |
2810 | let e = _mm256_set_epi8( |
2811 | 0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, |
2812 | 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, |
2813 | ); |
2814 | assert_eq_m256i(r, e); |
2815 | } |
2816 | |
2817 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2818 | unsafe fn test_mm_mask_expand_epi8() { |
2819 | let src = _mm_set1_epi8(100); |
2820 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2821 | let r = _mm_mask_expand_epi8(src, 0b01010101_01010101, a); |
2822 | let e = _mm_set_epi8( |
2823 | 100, 8, 100, 9, 100, 10, 100, 11, 100, 12, 100, 13, 100, 14, 100, 15, |
2824 | ); |
2825 | assert_eq_m128i(r, e); |
2826 | } |
2827 | |
2828 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2829 | unsafe fn test_mm_maskz_expand_epi8() { |
2830 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2831 | let r = _mm_maskz_expand_epi8(0b01010101_01010101, a); |
2832 | let e = _mm_set_epi8(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15); |
2833 | assert_eq_m128i(r, e); |
2834 | } |
2835 | |
2836 | #[simd_test(enable = "avx512vbmi2" )] |
2837 | unsafe fn test_mm512_shldv_epi64() { |
2838 | let a = _mm512_set1_epi64(1); |
2839 | let b = _mm512_set1_epi64(1 << 63); |
2840 | let c = _mm512_set1_epi64(2); |
2841 | let r = _mm512_shldv_epi64(a, b, c); |
2842 | let e = _mm512_set1_epi64(6); |
2843 | assert_eq_m512i(r, e); |
2844 | } |
2845 | |
2846 | #[simd_test(enable = "avx512vbmi2" )] |
2847 | unsafe fn test_mm512_mask_shldv_epi64() { |
2848 | let a = _mm512_set1_epi64(1); |
2849 | let b = _mm512_set1_epi64(1 << 63); |
2850 | let c = _mm512_set1_epi64(2); |
2851 | let r = _mm512_mask_shldv_epi64(a, 0, b, c); |
2852 | assert_eq_m512i(r, a); |
2853 | let r = _mm512_mask_shldv_epi64(a, 0b11111111, b, c); |
2854 | let e = _mm512_set1_epi64(6); |
2855 | assert_eq_m512i(r, e); |
2856 | } |
2857 | |
2858 | #[simd_test(enable = "avx512vbmi2" )] |
2859 | unsafe fn test_mm512_maskz_shldv_epi64() { |
2860 | let a = _mm512_set1_epi64(1); |
2861 | let b = _mm512_set1_epi64(1 << 63); |
2862 | let c = _mm512_set1_epi64(2); |
2863 | let r = _mm512_maskz_shldv_epi64(0, a, b, c); |
2864 | assert_eq_m512i(r, _mm512_setzero_si512()); |
2865 | let r = _mm512_maskz_shldv_epi64(0b11111111, a, b, c); |
2866 | let e = _mm512_set1_epi64(6); |
2867 | assert_eq_m512i(r, e); |
2868 | } |
2869 | |
2870 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2871 | unsafe fn test_mm256_shldv_epi64() { |
2872 | let a = _mm256_set1_epi64x(1); |
2873 | let b = _mm256_set1_epi64x(1 << 63); |
2874 | let c = _mm256_set1_epi64x(2); |
2875 | let r = _mm256_shldv_epi64(a, b, c); |
2876 | let e = _mm256_set1_epi64x(6); |
2877 | assert_eq_m256i(r, e); |
2878 | } |
2879 | |
2880 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2881 | unsafe fn test_mm256_mask_shldv_epi64() { |
2882 | let a = _mm256_set1_epi64x(1); |
2883 | let b = _mm256_set1_epi64x(1 << 63); |
2884 | let c = _mm256_set1_epi64x(2); |
2885 | let r = _mm256_mask_shldv_epi64(a, 0, b, c); |
2886 | assert_eq_m256i(r, a); |
2887 | let r = _mm256_mask_shldv_epi64(a, 0b00001111, b, c); |
2888 | let e = _mm256_set1_epi64x(6); |
2889 | assert_eq_m256i(r, e); |
2890 | } |
2891 | |
2892 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2893 | unsafe fn test_mm256_maskz_shldv_epi64() { |
2894 | let a = _mm256_set1_epi64x(1); |
2895 | let b = _mm256_set1_epi64x(1 << 63); |
2896 | let c = _mm256_set1_epi64x(2); |
2897 | let r = _mm256_maskz_shldv_epi64(0, a, b, c); |
2898 | assert_eq_m256i(r, _mm256_setzero_si256()); |
2899 | let r = _mm256_maskz_shldv_epi64(0b00001111, a, b, c); |
2900 | let e = _mm256_set1_epi64x(6); |
2901 | assert_eq_m256i(r, e); |
2902 | } |
2903 | |
2904 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2905 | unsafe fn test_mm_shldv_epi64() { |
2906 | let a = _mm_set1_epi64x(1); |
2907 | let b = _mm_set1_epi64x(1 << 63); |
2908 | let c = _mm_set1_epi64x(2); |
2909 | let r = _mm_shldv_epi64(a, b, c); |
2910 | let e = _mm_set1_epi64x(6); |
2911 | assert_eq_m128i(r, e); |
2912 | } |
2913 | |
2914 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2915 | unsafe fn test_mm_mask_shldv_epi64() { |
2916 | let a = _mm_set1_epi64x(1); |
2917 | let b = _mm_set1_epi64x(1 << 63); |
2918 | let c = _mm_set1_epi64x(2); |
2919 | let r = _mm_mask_shldv_epi64(a, 0, b, c); |
2920 | assert_eq_m128i(r, a); |
2921 | let r = _mm_mask_shldv_epi64(a, 0b00000011, b, c); |
2922 | let e = _mm_set1_epi64x(6); |
2923 | assert_eq_m128i(r, e); |
2924 | } |
2925 | |
2926 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2927 | unsafe fn test_mm_maskz_shldv_epi64() { |
2928 | let a = _mm_set1_epi64x(1); |
2929 | let b = _mm_set1_epi64x(1 << 63); |
2930 | let c = _mm_set1_epi64x(2); |
2931 | let r = _mm_maskz_shldv_epi64(0, a, b, c); |
2932 | assert_eq_m128i(r, _mm_setzero_si128()); |
2933 | let r = _mm_maskz_shldv_epi64(0b00000011, a, b, c); |
2934 | let e = _mm_set1_epi64x(6); |
2935 | assert_eq_m128i(r, e); |
2936 | } |
2937 | |
2938 | #[simd_test(enable = "avx512vbmi2" )] |
2939 | unsafe fn test_mm512_shldv_epi32() { |
2940 | let a = _mm512_set1_epi32(1); |
2941 | let b = _mm512_set1_epi32(1 << 31); |
2942 | let c = _mm512_set1_epi32(2); |
2943 | let r = _mm512_shldv_epi32(a, b, c); |
2944 | let e = _mm512_set1_epi32(6); |
2945 | assert_eq_m512i(r, e); |
2946 | } |
2947 | |
2948 | #[simd_test(enable = "avx512vbmi2" )] |
2949 | unsafe fn test_mm512_mask_shldv_epi32() { |
2950 | let a = _mm512_set1_epi32(1); |
2951 | let b = _mm512_set1_epi32(1 << 31); |
2952 | let c = _mm512_set1_epi32(2); |
2953 | let r = _mm512_mask_shldv_epi32(a, 0, b, c); |
2954 | assert_eq_m512i(r, a); |
2955 | let r = _mm512_mask_shldv_epi32(a, 0b11111111_11111111, b, c); |
2956 | let e = _mm512_set1_epi32(6); |
2957 | assert_eq_m512i(r, e); |
2958 | } |
2959 | |
2960 | #[simd_test(enable = "avx512vbmi2" )] |
2961 | unsafe fn test_mm512_maskz_shldv_epi32() { |
2962 | let a = _mm512_set1_epi32(1); |
2963 | let b = _mm512_set1_epi32(1 << 31); |
2964 | let c = _mm512_set1_epi32(2); |
2965 | let r = _mm512_maskz_shldv_epi32(0, a, b, c); |
2966 | assert_eq_m512i(r, _mm512_setzero_si512()); |
2967 | let r = _mm512_maskz_shldv_epi32(0b11111111_11111111, a, b, c); |
2968 | let e = _mm512_set1_epi32(6); |
2969 | assert_eq_m512i(r, e); |
2970 | } |
2971 | |
2972 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2973 | unsafe fn test_mm256_shldv_epi32() { |
2974 | let a = _mm256_set1_epi32(1); |
2975 | let b = _mm256_set1_epi32(1 << 31); |
2976 | let c = _mm256_set1_epi32(2); |
2977 | let r = _mm256_shldv_epi32(a, b, c); |
2978 | let e = _mm256_set1_epi32(6); |
2979 | assert_eq_m256i(r, e); |
2980 | } |
2981 | |
2982 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2983 | unsafe fn test_mm256_mask_shldv_epi32() { |
2984 | let a = _mm256_set1_epi32(1); |
2985 | let b = _mm256_set1_epi32(1 << 31); |
2986 | let c = _mm256_set1_epi32(2); |
2987 | let r = _mm256_mask_shldv_epi32(a, 0, b, c); |
2988 | assert_eq_m256i(r, a); |
2989 | let r = _mm256_mask_shldv_epi32(a, 0b11111111, b, c); |
2990 | let e = _mm256_set1_epi32(6); |
2991 | assert_eq_m256i(r, e); |
2992 | } |
2993 | |
2994 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2995 | unsafe fn test_mm256_maskz_shldv_epi32() { |
2996 | let a = _mm256_set1_epi32(1); |
2997 | let b = _mm256_set1_epi32(1 << 31); |
2998 | let c = _mm256_set1_epi32(2); |
2999 | let r = _mm256_maskz_shldv_epi32(0, a, b, c); |
3000 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3001 | let r = _mm256_maskz_shldv_epi32(0b11111111, a, b, c); |
3002 | let e = _mm256_set1_epi32(6); |
3003 | assert_eq_m256i(r, e); |
3004 | } |
3005 | |
3006 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3007 | unsafe fn test_mm_shldv_epi32() { |
3008 | let a = _mm_set1_epi32(1); |
3009 | let b = _mm_set1_epi32(1 << 31); |
3010 | let c = _mm_set1_epi32(2); |
3011 | let r = _mm_shldv_epi32(a, b, c); |
3012 | let e = _mm_set1_epi32(6); |
3013 | assert_eq_m128i(r, e); |
3014 | } |
3015 | |
3016 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3017 | unsafe fn test_mm_mask_shldv_epi32() { |
3018 | let a = _mm_set1_epi32(1); |
3019 | let b = _mm_set1_epi32(1 << 31); |
3020 | let c = _mm_set1_epi32(2); |
3021 | let r = _mm_mask_shldv_epi32(a, 0, b, c); |
3022 | assert_eq_m128i(r, a); |
3023 | let r = _mm_mask_shldv_epi32(a, 0b00001111, b, c); |
3024 | let e = _mm_set1_epi32(6); |
3025 | assert_eq_m128i(r, e); |
3026 | } |
3027 | |
3028 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3029 | unsafe fn test_mm_maskz_shldv_epi32() { |
3030 | let a = _mm_set1_epi32(1); |
3031 | let b = _mm_set1_epi32(1 << 31); |
3032 | let c = _mm_set1_epi32(2); |
3033 | let r = _mm_maskz_shldv_epi32(0, a, b, c); |
3034 | assert_eq_m128i(r, _mm_setzero_si128()); |
3035 | let r = _mm_maskz_shldv_epi32(0b00001111, a, b, c); |
3036 | let e = _mm_set1_epi32(6); |
3037 | assert_eq_m128i(r, e); |
3038 | } |
3039 | |
3040 | #[simd_test(enable = "avx512vbmi2" )] |
3041 | unsafe fn test_mm512_shldv_epi16() { |
3042 | let a = _mm512_set1_epi16(1); |
3043 | let b = _mm512_set1_epi16(1 << 15); |
3044 | let c = _mm512_set1_epi16(2); |
3045 | let r = _mm512_shldv_epi16(a, b, c); |
3046 | let e = _mm512_set1_epi16(6); |
3047 | assert_eq_m512i(r, e); |
3048 | } |
3049 | |
3050 | #[simd_test(enable = "avx512vbmi2" )] |
3051 | unsafe fn test_mm512_mask_shldv_epi16() { |
3052 | let a = _mm512_set1_epi16(1); |
3053 | let b = _mm512_set1_epi16(1 << 15); |
3054 | let c = _mm512_set1_epi16(2); |
3055 | let r = _mm512_mask_shldv_epi16(a, 0, b, c); |
3056 | assert_eq_m512i(r, a); |
3057 | let r = _mm512_mask_shldv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c); |
3058 | let e = _mm512_set1_epi16(6); |
3059 | assert_eq_m512i(r, e); |
3060 | } |
3061 | |
3062 | #[simd_test(enable = "avx512vbmi2" )] |
3063 | unsafe fn test_mm512_maskz_shldv_epi16() { |
3064 | let a = _mm512_set1_epi16(1); |
3065 | let b = _mm512_set1_epi16(1 << 15); |
3066 | let c = _mm512_set1_epi16(2); |
3067 | let r = _mm512_maskz_shldv_epi16(0, a, b, c); |
3068 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3069 | let r = _mm512_maskz_shldv_epi16(0b11111111_11111111_11111111_11111111, a, b, c); |
3070 | let e = _mm512_set1_epi16(6); |
3071 | assert_eq_m512i(r, e); |
3072 | } |
3073 | |
3074 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3075 | unsafe fn test_mm256_shldv_epi16() { |
3076 | let a = _mm256_set1_epi16(1); |
3077 | let b = _mm256_set1_epi16(1 << 15); |
3078 | let c = _mm256_set1_epi16(2); |
3079 | let r = _mm256_shldv_epi16(a, b, c); |
3080 | let e = _mm256_set1_epi16(6); |
3081 | assert_eq_m256i(r, e); |
3082 | } |
3083 | |
3084 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3085 | unsafe fn test_mm256_mask_shldv_epi16() { |
3086 | let a = _mm256_set1_epi16(1); |
3087 | let b = _mm256_set1_epi16(1 << 15); |
3088 | let c = _mm256_set1_epi16(2); |
3089 | let r = _mm256_mask_shldv_epi16(a, 0, b, c); |
3090 | assert_eq_m256i(r, a); |
3091 | let r = _mm256_mask_shldv_epi16(a, 0b11111111_11111111, b, c); |
3092 | let e = _mm256_set1_epi16(6); |
3093 | assert_eq_m256i(r, e); |
3094 | } |
3095 | |
3096 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3097 | unsafe fn test_mm256_maskz_shldv_epi16() { |
3098 | let a = _mm256_set1_epi16(1); |
3099 | let b = _mm256_set1_epi16(1 << 15); |
3100 | let c = _mm256_set1_epi16(2); |
3101 | let r = _mm256_maskz_shldv_epi16(0, a, b, c); |
3102 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3103 | let r = _mm256_maskz_shldv_epi16(0b11111111_11111111, a, b, c); |
3104 | let e = _mm256_set1_epi16(6); |
3105 | assert_eq_m256i(r, e); |
3106 | } |
3107 | |
3108 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3109 | unsafe fn test_mm_shldv_epi16() { |
3110 | let a = _mm_set1_epi16(1); |
3111 | let b = _mm_set1_epi16(1 << 15); |
3112 | let c = _mm_set1_epi16(2); |
3113 | let r = _mm_shldv_epi16(a, b, c); |
3114 | let e = _mm_set1_epi16(6); |
3115 | assert_eq_m128i(r, e); |
3116 | } |
3117 | |
3118 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3119 | unsafe fn test_mm_mask_shldv_epi16() { |
3120 | let a = _mm_set1_epi16(1); |
3121 | let b = _mm_set1_epi16(1 << 15); |
3122 | let c = _mm_set1_epi16(2); |
3123 | let r = _mm_mask_shldv_epi16(a, 0, b, c); |
3124 | assert_eq_m128i(r, a); |
3125 | let r = _mm_mask_shldv_epi16(a, 0b11111111, b, c); |
3126 | let e = _mm_set1_epi16(6); |
3127 | assert_eq_m128i(r, e); |
3128 | } |
3129 | |
3130 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3131 | unsafe fn test_mm_maskz_shldv_epi16() { |
3132 | let a = _mm_set1_epi16(1); |
3133 | let b = _mm_set1_epi16(1 << 15); |
3134 | let c = _mm_set1_epi16(2); |
3135 | let r = _mm_maskz_shldv_epi16(0, a, b, c); |
3136 | assert_eq_m128i(r, _mm_setzero_si128()); |
3137 | let r = _mm_maskz_shldv_epi16(0b11111111, a, b, c); |
3138 | let e = _mm_set1_epi16(6); |
3139 | assert_eq_m128i(r, e); |
3140 | } |
3141 | |
3142 | #[simd_test(enable = "avx512vbmi2" )] |
3143 | unsafe fn test_mm512_shrdv_epi64() { |
3144 | let a = _mm512_set1_epi64(8); |
3145 | let b = _mm512_set1_epi64(2); |
3146 | let c = _mm512_set1_epi64(1); |
3147 | let r = _mm512_shrdv_epi64(a, b, c); |
3148 | let e = _mm512_set1_epi64(1); |
3149 | assert_eq_m512i(r, e); |
3150 | } |
3151 | |
3152 | #[simd_test(enable = "avx512vbmi2" )] |
3153 | unsafe fn test_mm512_mask_shrdv_epi64() { |
3154 | let a = _mm512_set1_epi64(8); |
3155 | let b = _mm512_set1_epi64(2); |
3156 | let c = _mm512_set1_epi64(1); |
3157 | let r = _mm512_mask_shrdv_epi64(a, 0, b, c); |
3158 | assert_eq_m512i(r, a); |
3159 | let r = _mm512_mask_shrdv_epi64(a, 0b11111111, b, c); |
3160 | let e = _mm512_set1_epi64(1); |
3161 | assert_eq_m512i(r, e); |
3162 | } |
3163 | |
3164 | #[simd_test(enable = "avx512vbmi2" )] |
3165 | unsafe fn test_mm512_maskz_shrdv_epi64() { |
3166 | let a = _mm512_set1_epi64(8); |
3167 | let b = _mm512_set1_epi64(2); |
3168 | let c = _mm512_set1_epi64(1); |
3169 | let r = _mm512_maskz_shrdv_epi64(0, a, b, c); |
3170 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3171 | let r = _mm512_maskz_shrdv_epi64(0b11111111, a, b, c); |
3172 | let e = _mm512_set1_epi64(1); |
3173 | assert_eq_m512i(r, e); |
3174 | } |
3175 | |
3176 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3177 | unsafe fn test_mm256_shrdv_epi64() { |
3178 | let a = _mm256_set1_epi64x(8); |
3179 | let b = _mm256_set1_epi64x(2); |
3180 | let c = _mm256_set1_epi64x(1); |
3181 | let r = _mm256_shrdv_epi64(a, b, c); |
3182 | let e = _mm256_set1_epi64x(1); |
3183 | assert_eq_m256i(r, e); |
3184 | } |
3185 | |
3186 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3187 | unsafe fn test_mm256_mask_shrdv_epi64() { |
3188 | let a = _mm256_set1_epi64x(8); |
3189 | let b = _mm256_set1_epi64x(2); |
3190 | let c = _mm256_set1_epi64x(1); |
3191 | let r = _mm256_mask_shrdv_epi64(a, 0, b, c); |
3192 | assert_eq_m256i(r, a); |
3193 | let r = _mm256_mask_shrdv_epi64(a, 0b00001111, b, c); |
3194 | let e = _mm256_set1_epi64x(1); |
3195 | assert_eq_m256i(r, e); |
3196 | } |
3197 | |
3198 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3199 | unsafe fn test_mm256_maskz_shrdv_epi64() { |
3200 | let a = _mm256_set1_epi64x(8); |
3201 | let b = _mm256_set1_epi64x(2); |
3202 | let c = _mm256_set1_epi64x(1); |
3203 | let r = _mm256_maskz_shrdv_epi64(0, a, b, c); |
3204 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3205 | let r = _mm256_maskz_shrdv_epi64(0b00001111, a, b, c); |
3206 | let e = _mm256_set1_epi64x(1); |
3207 | assert_eq_m256i(r, e); |
3208 | } |
3209 | |
3210 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3211 | unsafe fn test_mm_shrdv_epi64() { |
3212 | let a = _mm_set1_epi64x(8); |
3213 | let b = _mm_set1_epi64x(2); |
3214 | let c = _mm_set1_epi64x(1); |
3215 | let r = _mm_shrdv_epi64(a, b, c); |
3216 | let e = _mm_set1_epi64x(1); |
3217 | assert_eq_m128i(r, e); |
3218 | } |
3219 | |
3220 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3221 | unsafe fn test_mm_mask_shrdv_epi64() { |
3222 | let a = _mm_set1_epi64x(8); |
3223 | let b = _mm_set1_epi64x(2); |
3224 | let c = _mm_set1_epi64x(1); |
3225 | let r = _mm_mask_shrdv_epi64(a, 0, b, c); |
3226 | assert_eq_m128i(r, a); |
3227 | let r = _mm_mask_shrdv_epi64(a, 0b00000011, b, c); |
3228 | let e = _mm_set1_epi64x(1); |
3229 | assert_eq_m128i(r, e); |
3230 | } |
3231 | |
3232 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3233 | unsafe fn test_mm_maskz_shrdv_epi64() { |
3234 | let a = _mm_set1_epi64x(8); |
3235 | let b = _mm_set1_epi64x(2); |
3236 | let c = _mm_set1_epi64x(1); |
3237 | let r = _mm_maskz_shrdv_epi64(0, a, b, c); |
3238 | assert_eq_m128i(r, _mm_setzero_si128()); |
3239 | let r = _mm_maskz_shrdv_epi64(0b00000011, a, b, c); |
3240 | let e = _mm_set1_epi64x(1); |
3241 | assert_eq_m128i(r, e); |
3242 | } |
3243 | |
3244 | #[simd_test(enable = "avx512vbmi2" )] |
3245 | unsafe fn test_mm512_shrdv_epi32() { |
3246 | let a = _mm512_set1_epi32(8); |
3247 | let b = _mm512_set1_epi32(2); |
3248 | let c = _mm512_set1_epi32(1); |
3249 | let r = _mm512_shrdv_epi32(a, b, c); |
3250 | let e = _mm512_set1_epi32(1); |
3251 | assert_eq_m512i(r, e); |
3252 | } |
3253 | |
3254 | #[simd_test(enable = "avx512vbmi2" )] |
3255 | unsafe fn test_mm512_mask_shrdv_epi32() { |
3256 | let a = _mm512_set1_epi32(8); |
3257 | let b = _mm512_set1_epi32(2); |
3258 | let c = _mm512_set1_epi32(1); |
3259 | let r = _mm512_mask_shrdv_epi32(a, 0, b, c); |
3260 | assert_eq_m512i(r, a); |
3261 | let r = _mm512_mask_shrdv_epi32(a, 0b11111111_11111111, b, c); |
3262 | let e = _mm512_set1_epi32(1); |
3263 | assert_eq_m512i(r, e); |
3264 | } |
3265 | |
3266 | #[simd_test(enable = "avx512vbmi2" )] |
3267 | unsafe fn test_mm512_maskz_shrdv_epi32() { |
3268 | let a = _mm512_set1_epi32(8); |
3269 | let b = _mm512_set1_epi32(2); |
3270 | let c = _mm512_set1_epi32(1); |
3271 | let r = _mm512_maskz_shrdv_epi32(0, a, b, c); |
3272 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3273 | let r = _mm512_maskz_shrdv_epi32(0b11111111_11111111, a, b, c); |
3274 | let e = _mm512_set1_epi32(1); |
3275 | assert_eq_m512i(r, e); |
3276 | } |
3277 | |
3278 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3279 | unsafe fn test_mm256_shrdv_epi32() { |
3280 | let a = _mm256_set1_epi32(8); |
3281 | let b = _mm256_set1_epi32(2); |
3282 | let c = _mm256_set1_epi32(1); |
3283 | let r = _mm256_shrdv_epi32(a, b, c); |
3284 | let e = _mm256_set1_epi32(1); |
3285 | assert_eq_m256i(r, e); |
3286 | } |
3287 | |
3288 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3289 | unsafe fn test_mm256_mask_shrdv_epi32() { |
3290 | let a = _mm256_set1_epi32(8); |
3291 | let b = _mm256_set1_epi32(2); |
3292 | let c = _mm256_set1_epi32(1); |
3293 | let r = _mm256_mask_shrdv_epi32(a, 0, b, c); |
3294 | assert_eq_m256i(r, a); |
3295 | let r = _mm256_mask_shrdv_epi32(a, 0b11111111, b, c); |
3296 | let e = _mm256_set1_epi32(1); |
3297 | assert_eq_m256i(r, e); |
3298 | } |
3299 | |
3300 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3301 | unsafe fn test_mm256_maskz_shrdv_epi32() { |
3302 | let a = _mm256_set1_epi32(8); |
3303 | let b = _mm256_set1_epi32(2); |
3304 | let c = _mm256_set1_epi32(1); |
3305 | let r = _mm256_maskz_shrdv_epi32(0, a, b, c); |
3306 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3307 | let r = _mm256_maskz_shrdv_epi32(0b11111111, a, b, c); |
3308 | let e = _mm256_set1_epi32(1); |
3309 | assert_eq_m256i(r, e); |
3310 | } |
3311 | |
3312 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3313 | unsafe fn test_mm_shrdv_epi32() { |
3314 | let a = _mm_set1_epi32(8); |
3315 | let b = _mm_set1_epi32(2); |
3316 | let c = _mm_set1_epi32(1); |
3317 | let r = _mm_shrdv_epi32(a, b, c); |
3318 | let e = _mm_set1_epi32(1); |
3319 | assert_eq_m128i(r, e); |
3320 | } |
3321 | |
3322 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3323 | unsafe fn test_mm_mask_shrdv_epi32() { |
3324 | let a = _mm_set1_epi32(8); |
3325 | let b = _mm_set1_epi32(2); |
3326 | let c = _mm_set1_epi32(1); |
3327 | let r = _mm_mask_shrdv_epi32(a, 0, b, c); |
3328 | assert_eq_m128i(r, a); |
3329 | let r = _mm_mask_shrdv_epi32(a, 0b00001111, b, c); |
3330 | let e = _mm_set1_epi32(1); |
3331 | assert_eq_m128i(r, e); |
3332 | } |
3333 | |
3334 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3335 | unsafe fn test_mm_maskz_shrdv_epi32() { |
3336 | let a = _mm_set1_epi32(8); |
3337 | let b = _mm_set1_epi32(2); |
3338 | let c = _mm_set1_epi32(1); |
3339 | let r = _mm_maskz_shrdv_epi32(0, a, b, c); |
3340 | assert_eq_m128i(r, _mm_setzero_si128()); |
3341 | let r = _mm_maskz_shrdv_epi32(0b00001111, a, b, c); |
3342 | let e = _mm_set1_epi32(1); |
3343 | assert_eq_m128i(r, e); |
3344 | } |
3345 | |
3346 | #[simd_test(enable = "avx512vbmi2" )] |
3347 | unsafe fn test_mm512_shrdv_epi16() { |
3348 | let a = _mm512_set1_epi16(8); |
3349 | let b = _mm512_set1_epi16(2); |
3350 | let c = _mm512_set1_epi16(1); |
3351 | let r = _mm512_shrdv_epi16(a, b, c); |
3352 | let e = _mm512_set1_epi16(1); |
3353 | assert_eq_m512i(r, e); |
3354 | } |
3355 | |
3356 | #[simd_test(enable = "avx512vbmi2" )] |
3357 | unsafe fn test_mm512_mask_shrdv_epi16() { |
3358 | let a = _mm512_set1_epi16(8); |
3359 | let b = _mm512_set1_epi16(2); |
3360 | let c = _mm512_set1_epi16(1); |
3361 | let r = _mm512_mask_shrdv_epi16(a, 0, b, c); |
3362 | assert_eq_m512i(r, a); |
3363 | let r = _mm512_mask_shrdv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c); |
3364 | let e = _mm512_set1_epi16(1); |
3365 | assert_eq_m512i(r, e); |
3366 | } |
3367 | |
3368 | #[simd_test(enable = "avx512vbmi2" )] |
3369 | unsafe fn test_mm512_maskz_shrdv_epi16() { |
3370 | let a = _mm512_set1_epi16(8); |
3371 | let b = _mm512_set1_epi16(2); |
3372 | let c = _mm512_set1_epi16(1); |
3373 | let r = _mm512_maskz_shrdv_epi16(0, a, b, c); |
3374 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3375 | let r = _mm512_maskz_shrdv_epi16(0b11111111_11111111_11111111_11111111, a, b, c); |
3376 | let e = _mm512_set1_epi16(1); |
3377 | assert_eq_m512i(r, e); |
3378 | } |
3379 | |
3380 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3381 | unsafe fn test_mm256_shrdv_epi16() { |
3382 | let a = _mm256_set1_epi16(8); |
3383 | let b = _mm256_set1_epi16(2); |
3384 | let c = _mm256_set1_epi16(1); |
3385 | let r = _mm256_shrdv_epi16(a, b, c); |
3386 | let e = _mm256_set1_epi16(1); |
3387 | assert_eq_m256i(r, e); |
3388 | } |
3389 | |
3390 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3391 | unsafe fn test_mm256_mask_shrdv_epi16() { |
3392 | let a = _mm256_set1_epi16(8); |
3393 | let b = _mm256_set1_epi16(2); |
3394 | let c = _mm256_set1_epi16(1); |
3395 | let r = _mm256_mask_shrdv_epi16(a, 0, b, c); |
3396 | assert_eq_m256i(r, a); |
3397 | let r = _mm256_mask_shrdv_epi16(a, 0b11111111_11111111, b, c); |
3398 | let e = _mm256_set1_epi16(1); |
3399 | assert_eq_m256i(r, e); |
3400 | } |
3401 | |
3402 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3403 | unsafe fn test_mm256_maskz_shrdv_epi16() { |
3404 | let a = _mm256_set1_epi16(8); |
3405 | let b = _mm256_set1_epi16(2); |
3406 | let c = _mm256_set1_epi16(1); |
3407 | let r = _mm256_maskz_shrdv_epi16(0, a, b, c); |
3408 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3409 | let r = _mm256_maskz_shrdv_epi16(0b11111111_11111111, a, b, c); |
3410 | let e = _mm256_set1_epi16(1); |
3411 | assert_eq_m256i(r, e); |
3412 | } |
3413 | |
3414 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3415 | unsafe fn test_mm_shrdv_epi16() { |
3416 | let a = _mm_set1_epi16(8); |
3417 | let b = _mm_set1_epi16(2); |
3418 | let c = _mm_set1_epi16(1); |
3419 | let r = _mm_shrdv_epi16(a, b, c); |
3420 | let e = _mm_set1_epi16(1); |
3421 | assert_eq_m128i(r, e); |
3422 | } |
3423 | |
3424 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3425 | unsafe fn test_mm_mask_shrdv_epi16() { |
3426 | let a = _mm_set1_epi16(8); |
3427 | let b = _mm_set1_epi16(2); |
3428 | let c = _mm_set1_epi16(1); |
3429 | let r = _mm_mask_shrdv_epi16(a, 0, b, c); |
3430 | assert_eq_m128i(r, a); |
3431 | let r = _mm_mask_shrdv_epi16(a, 0b11111111, b, c); |
3432 | let e = _mm_set1_epi16(1); |
3433 | assert_eq_m128i(r, e); |
3434 | } |
3435 | |
3436 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3437 | unsafe fn test_mm_maskz_shrdv_epi16() { |
3438 | let a = _mm_set1_epi16(8); |
3439 | let b = _mm_set1_epi16(2); |
3440 | let c = _mm_set1_epi16(1); |
3441 | let r = _mm_maskz_shrdv_epi16(0, a, b, c); |
3442 | assert_eq_m128i(r, _mm_setzero_si128()); |
3443 | let r = _mm_maskz_shrdv_epi16(0b11111111, a, b, c); |
3444 | let e = _mm_set1_epi16(1); |
3445 | assert_eq_m128i(r, e); |
3446 | } |
3447 | |
3448 | #[simd_test(enable = "avx512vbmi2" )] |
3449 | unsafe fn test_mm512_shldi_epi64() { |
3450 | let a = _mm512_set1_epi64(1); |
3451 | let b = _mm512_set1_epi64(1 << 63); |
3452 | let r = _mm512_shldi_epi64::<2>(a, b); |
3453 | let e = _mm512_set1_epi64(6); |
3454 | assert_eq_m512i(r, e); |
3455 | } |
3456 | |
3457 | #[simd_test(enable = "avx512vbmi2" )] |
3458 | unsafe fn test_mm512_mask_shldi_epi64() { |
3459 | let a = _mm512_set1_epi64(1); |
3460 | let b = _mm512_set1_epi64(1 << 63); |
3461 | let r = _mm512_mask_shldi_epi64::<2>(a, 0, a, b); |
3462 | assert_eq_m512i(r, a); |
3463 | let r = _mm512_mask_shldi_epi64::<2>(a, 0b11111111, a, b); |
3464 | let e = _mm512_set1_epi64(6); |
3465 | assert_eq_m512i(r, e); |
3466 | } |
3467 | |
3468 | #[simd_test(enable = "avx512vbmi2" )] |
3469 | unsafe fn test_mm512_maskz_shldi_epi64() { |
3470 | let a = _mm512_set1_epi64(1); |
3471 | let b = _mm512_set1_epi64(1 << 63); |
3472 | let r = _mm512_maskz_shldi_epi64::<2>(0, a, b); |
3473 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3474 | let r = _mm512_maskz_shldi_epi64::<2>(0b11111111, a, b); |
3475 | let e = _mm512_set1_epi64(6); |
3476 | assert_eq_m512i(r, e); |
3477 | } |
3478 | |
3479 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3480 | unsafe fn test_mm256_shldi_epi64() { |
3481 | let a = _mm256_set1_epi64x(1); |
3482 | let b = _mm256_set1_epi64x(1 << 63); |
3483 | let r = _mm256_shldi_epi64::<2>(a, b); |
3484 | let e = _mm256_set1_epi64x(6); |
3485 | assert_eq_m256i(r, e); |
3486 | } |
3487 | |
3488 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3489 | unsafe fn test_mm256_mask_shldi_epi64() { |
3490 | let a = _mm256_set1_epi64x(1); |
3491 | let b = _mm256_set1_epi64x(1 << 63); |
3492 | let r = _mm256_mask_shldi_epi64::<2>(a, 0, a, b); |
3493 | assert_eq_m256i(r, a); |
3494 | let r = _mm256_mask_shldi_epi64::<2>(a, 0b00001111, a, b); |
3495 | let e = _mm256_set1_epi64x(6); |
3496 | assert_eq_m256i(r, e); |
3497 | } |
3498 | |
3499 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3500 | unsafe fn test_mm256_maskz_shldi_epi64() { |
3501 | let a = _mm256_set1_epi64x(1); |
3502 | let b = _mm256_set1_epi64x(1 << 63); |
3503 | let r = _mm256_maskz_shldi_epi64::<2>(0, a, b); |
3504 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3505 | let r = _mm256_maskz_shldi_epi64::<2>(0b00001111, a, b); |
3506 | let e = _mm256_set1_epi64x(6); |
3507 | assert_eq_m256i(r, e); |
3508 | } |
3509 | |
3510 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3511 | unsafe fn test_mm_shldi_epi64() { |
3512 | let a = _mm_set1_epi64x(1); |
3513 | let b = _mm_set1_epi64x(1 << 63); |
3514 | let r = _mm_shldi_epi64::<2>(a, b); |
3515 | let e = _mm_set1_epi64x(6); |
3516 | assert_eq_m128i(r, e); |
3517 | } |
3518 | |
3519 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3520 | unsafe fn test_mm_mask_shldi_epi64() { |
3521 | let a = _mm_set1_epi64x(1); |
3522 | let b = _mm_set1_epi64x(1 << 63); |
3523 | let r = _mm_mask_shldi_epi64::<2>(a, 0, a, b); |
3524 | assert_eq_m128i(r, a); |
3525 | let r = _mm_mask_shldi_epi64::<2>(a, 0b00000011, a, b); |
3526 | let e = _mm_set1_epi64x(6); |
3527 | assert_eq_m128i(r, e); |
3528 | } |
3529 | |
3530 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3531 | unsafe fn test_mm_maskz_shldi_epi64() { |
3532 | let a = _mm_set1_epi64x(1); |
3533 | let b = _mm_set1_epi64x(1 << 63); |
3534 | let r = _mm_maskz_shldi_epi64::<2>(0, a, b); |
3535 | assert_eq_m128i(r, _mm_setzero_si128()); |
3536 | let r = _mm_maskz_shldi_epi64::<2>(0b00000011, a, b); |
3537 | let e = _mm_set1_epi64x(6); |
3538 | assert_eq_m128i(r, e); |
3539 | } |
3540 | |
3541 | #[simd_test(enable = "avx512vbmi2" )] |
3542 | unsafe fn test_mm512_shldi_epi32() { |
3543 | let a = _mm512_set1_epi32(1); |
3544 | let b = _mm512_set1_epi32(1 << 31); |
3545 | let r = _mm512_shldi_epi32::<2>(a, b); |
3546 | let e = _mm512_set1_epi32(6); |
3547 | assert_eq_m512i(r, e); |
3548 | } |
3549 | |
3550 | #[simd_test(enable = "avx512vbmi2" )] |
3551 | unsafe fn test_mm512_mask_shldi_epi32() { |
3552 | let a = _mm512_set1_epi32(1); |
3553 | let b = _mm512_set1_epi32(1 << 31); |
3554 | let r = _mm512_mask_shldi_epi32::<2>(a, 0, a, b); |
3555 | assert_eq_m512i(r, a); |
3556 | let r = _mm512_mask_shldi_epi32::<2>(a, 0b11111111_11111111, a, b); |
3557 | let e = _mm512_set1_epi32(6); |
3558 | assert_eq_m512i(r, e); |
3559 | } |
3560 | |
3561 | #[simd_test(enable = "avx512vbmi2" )] |
3562 | unsafe fn test_mm512_maskz_shldi_epi32() { |
3563 | let a = _mm512_set1_epi32(1); |
3564 | let b = _mm512_set1_epi32(1 << 31); |
3565 | let r = _mm512_maskz_shldi_epi32::<2>(0, a, b); |
3566 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3567 | let r = _mm512_maskz_shldi_epi32::<2>(0b11111111_11111111, a, b); |
3568 | let e = _mm512_set1_epi32(6); |
3569 | assert_eq_m512i(r, e); |
3570 | } |
3571 | |
3572 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3573 | unsafe fn test_mm256_shldi_epi32() { |
3574 | let a = _mm256_set1_epi32(1); |
3575 | let b = _mm256_set1_epi32(1 << 31); |
3576 | let r = _mm256_shldi_epi32::<2>(a, b); |
3577 | let e = _mm256_set1_epi32(6); |
3578 | assert_eq_m256i(r, e); |
3579 | } |
3580 | |
3581 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3582 | unsafe fn test_mm256_mask_shldi_epi32() { |
3583 | let a = _mm256_set1_epi32(1); |
3584 | let b = _mm256_set1_epi32(1 << 31); |
3585 | let r = _mm256_mask_shldi_epi32::<2>(a, 0, a, b); |
3586 | assert_eq_m256i(r, a); |
3587 | let r = _mm256_mask_shldi_epi32::<2>(a, 0b11111111, a, b); |
3588 | let e = _mm256_set1_epi32(6); |
3589 | assert_eq_m256i(r, e); |
3590 | } |
3591 | |
3592 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3593 | unsafe fn test_mm256_maskz_shldi_epi32() { |
3594 | let a = _mm256_set1_epi32(1); |
3595 | let b = _mm256_set1_epi32(1 << 31); |
3596 | let r = _mm256_maskz_shldi_epi32::<2>(0, a, b); |
3597 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3598 | let r = _mm256_maskz_shldi_epi32::<2>(0b11111111, a, b); |
3599 | let e = _mm256_set1_epi32(6); |
3600 | assert_eq_m256i(r, e); |
3601 | } |
3602 | |
3603 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3604 | unsafe fn test_mm_shldi_epi32() { |
3605 | let a = _mm_set1_epi32(1); |
3606 | let b = _mm_set1_epi32(1 << 31); |
3607 | let r = _mm_shldi_epi32::<2>(a, b); |
3608 | let e = _mm_set1_epi32(6); |
3609 | assert_eq_m128i(r, e); |
3610 | } |
3611 | |
3612 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3613 | unsafe fn test_mm_mask_shldi_epi32() { |
3614 | let a = _mm_set1_epi32(1); |
3615 | let b = _mm_set1_epi32(1 << 31); |
3616 | let r = _mm_mask_shldi_epi32::<2>(a, 0, a, b); |
3617 | assert_eq_m128i(r, a); |
3618 | let r = _mm_mask_shldi_epi32::<2>(a, 0b00001111, a, b); |
3619 | let e = _mm_set1_epi32(6); |
3620 | assert_eq_m128i(r, e); |
3621 | } |
3622 | |
3623 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3624 | unsafe fn test_mm_maskz_shldi_epi32() { |
3625 | let a = _mm_set1_epi32(1); |
3626 | let b = _mm_set1_epi32(1 << 31); |
3627 | let r = _mm_maskz_shldi_epi32::<2>(0, a, b); |
3628 | assert_eq_m128i(r, _mm_setzero_si128()); |
3629 | let r = _mm_maskz_shldi_epi32::<2>(0b00001111, a, b); |
3630 | let e = _mm_set1_epi32(6); |
3631 | assert_eq_m128i(r, e); |
3632 | } |
3633 | |
3634 | #[simd_test(enable = "avx512vbmi2" )] |
3635 | unsafe fn test_mm512_shldi_epi16() { |
3636 | let a = _mm512_set1_epi16(1); |
3637 | let b = _mm512_set1_epi16(1 << 15); |
3638 | let r = _mm512_shldi_epi16::<2>(a, b); |
3639 | let e = _mm512_set1_epi16(6); |
3640 | assert_eq_m512i(r, e); |
3641 | } |
3642 | |
3643 | #[simd_test(enable = "avx512vbmi2" )] |
3644 | unsafe fn test_mm512_mask_shldi_epi16() { |
3645 | let a = _mm512_set1_epi16(1); |
3646 | let b = _mm512_set1_epi16(1 << 15); |
3647 | let r = _mm512_mask_shldi_epi16::<2>(a, 0, a, b); |
3648 | assert_eq_m512i(r, a); |
3649 | let r = _mm512_mask_shldi_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a, b); |
3650 | let e = _mm512_set1_epi16(6); |
3651 | assert_eq_m512i(r, e); |
3652 | } |
3653 | |
3654 | #[simd_test(enable = "avx512vbmi2" )] |
3655 | unsafe fn test_mm512_maskz_shldi_epi16() { |
3656 | let a = _mm512_set1_epi16(1); |
3657 | let b = _mm512_set1_epi16(1 << 15); |
3658 | let r = _mm512_maskz_shldi_epi16::<2>(0, a, b); |
3659 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3660 | let r = _mm512_maskz_shldi_epi16::<2>(0b11111111_11111111_11111111_11111111, a, b); |
3661 | let e = _mm512_set1_epi16(6); |
3662 | assert_eq_m512i(r, e); |
3663 | } |
3664 | |
3665 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3666 | unsafe fn test_mm256_shldi_epi16() { |
3667 | let a = _mm256_set1_epi16(1); |
3668 | let b = _mm256_set1_epi16(1 << 15); |
3669 | let r = _mm256_shldi_epi16::<2>(a, b); |
3670 | let e = _mm256_set1_epi16(6); |
3671 | assert_eq_m256i(r, e); |
3672 | } |
3673 | |
3674 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3675 | unsafe fn test_mm256_mask_shldi_epi16() { |
3676 | let a = _mm256_set1_epi16(1); |
3677 | let b = _mm256_set1_epi16(1 << 15); |
3678 | let r = _mm256_mask_shldi_epi16::<2>(a, 0, a, b); |
3679 | assert_eq_m256i(r, a); |
3680 | let r = _mm256_mask_shldi_epi16::<2>(a, 0b11111111_11111111, a, b); |
3681 | let e = _mm256_set1_epi16(6); |
3682 | assert_eq_m256i(r, e); |
3683 | } |
3684 | |
3685 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3686 | unsafe fn test_mm256_maskz_shldi_epi16() { |
3687 | let a = _mm256_set1_epi16(1); |
3688 | let b = _mm256_set1_epi16(1 << 15); |
3689 | let r = _mm256_maskz_shldi_epi16::<2>(0, a, b); |
3690 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3691 | let r = _mm256_maskz_shldi_epi16::<2>(0b11111111_11111111, a, b); |
3692 | let e = _mm256_set1_epi16(6); |
3693 | assert_eq_m256i(r, e); |
3694 | } |
3695 | |
3696 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3697 | unsafe fn test_mm_shldi_epi16() { |
3698 | let a = _mm_set1_epi16(1); |
3699 | let b = _mm_set1_epi16(1 << 15); |
3700 | let r = _mm_shldi_epi16::<2>(a, b); |
3701 | let e = _mm_set1_epi16(6); |
3702 | assert_eq_m128i(r, e); |
3703 | } |
3704 | |
3705 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3706 | unsafe fn test_mm_mask_shldi_epi16() { |
3707 | let a = _mm_set1_epi16(1); |
3708 | let b = _mm_set1_epi16(1 << 15); |
3709 | let r = _mm_mask_shldi_epi16::<2>(a, 0, a, b); |
3710 | assert_eq_m128i(r, a); |
3711 | let r = _mm_mask_shldi_epi16::<2>(a, 0b11111111, a, b); |
3712 | let e = _mm_set1_epi16(6); |
3713 | assert_eq_m128i(r, e); |
3714 | } |
3715 | |
3716 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3717 | unsafe fn test_mm_maskz_shldi_epi16() { |
3718 | let a = _mm_set1_epi16(1); |
3719 | let b = _mm_set1_epi16(1 << 15); |
3720 | let r = _mm_maskz_shldi_epi16::<2>(0, a, b); |
3721 | assert_eq_m128i(r, _mm_setzero_si128()); |
3722 | let r = _mm_maskz_shldi_epi16::<2>(0b11111111, a, b); |
3723 | let e = _mm_set1_epi16(6); |
3724 | assert_eq_m128i(r, e); |
3725 | } |
3726 | |
3727 | #[simd_test(enable = "avx512vbmi2" )] |
3728 | unsafe fn test_mm512_shrdi_epi64() { |
3729 | let a = _mm512_set1_epi64(8); |
3730 | let b = _mm512_set1_epi64(2); |
3731 | let r = _mm512_shrdi_epi64::<1>(a, b); |
3732 | let e = _mm512_set1_epi64(1); |
3733 | assert_eq_m512i(r, e); |
3734 | } |
3735 | |
3736 | #[simd_test(enable = "avx512vbmi2" )] |
3737 | unsafe fn test_mm512_mask_shrdi_epi64() { |
3738 | let a = _mm512_set1_epi64(8); |
3739 | let b = _mm512_set1_epi64(2); |
3740 | let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b); |
3741 | assert_eq_m512i(r, a); |
3742 | let r = _mm512_mask_shrdi_epi64::<1>(a, 0b11111111, a, b); |
3743 | let e = _mm512_set1_epi64(1); |
3744 | assert_eq_m512i(r, e); |
3745 | } |
3746 | |
3747 | #[simd_test(enable = "avx512vbmi2" )] |
3748 | unsafe fn test_mm512_maskz_shrdi_epi64() { |
3749 | let a = _mm512_set1_epi64(8); |
3750 | let b = _mm512_set1_epi64(2); |
3751 | let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b); |
3752 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3753 | let r = _mm512_maskz_shrdi_epi64::<1>(0b11111111, a, b); |
3754 | let e = _mm512_set1_epi64(1); |
3755 | assert_eq_m512i(r, e); |
3756 | } |
3757 | |
3758 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3759 | unsafe fn test_mm256_shrdi_epi64() { |
3760 | let a = _mm256_set1_epi64x(8); |
3761 | let b = _mm256_set1_epi64x(2); |
3762 | let r = _mm256_shrdi_epi64::<1>(a, b); |
3763 | let e = _mm256_set1_epi64x(1); |
3764 | assert_eq_m256i(r, e); |
3765 | } |
3766 | |
3767 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3768 | unsafe fn test_mm256_mask_shrdi_epi64() { |
3769 | let a = _mm256_set1_epi64x(8); |
3770 | let b = _mm256_set1_epi64x(2); |
3771 | let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b); |
3772 | assert_eq_m256i(r, a); |
3773 | let r = _mm256_mask_shrdi_epi64::<1>(a, 0b00001111, a, b); |
3774 | let e = _mm256_set1_epi64x(1); |
3775 | assert_eq_m256i(r, e); |
3776 | } |
3777 | |
3778 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3779 | unsafe fn test_mm256_maskz_shrdi_epi64() { |
3780 | let a = _mm256_set1_epi64x(8); |
3781 | let b = _mm256_set1_epi64x(2); |
3782 | let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b); |
3783 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3784 | let r = _mm256_maskz_shrdi_epi64::<1>(0b00001111, a, b); |
3785 | let e = _mm256_set1_epi64x(1); |
3786 | assert_eq_m256i(r, e); |
3787 | } |
3788 | |
3789 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3790 | unsafe fn test_mm_shrdi_epi64() { |
3791 | let a = _mm_set1_epi64x(8); |
3792 | let b = _mm_set1_epi64x(2); |
3793 | let r = _mm_shrdi_epi64::<1>(a, b); |
3794 | let e = _mm_set1_epi64x(1); |
3795 | assert_eq_m128i(r, e); |
3796 | } |
3797 | |
3798 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3799 | unsafe fn test_mm_mask_shrdi_epi64() { |
3800 | let a = _mm_set1_epi64x(8); |
3801 | let b = _mm_set1_epi64x(2); |
3802 | let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b); |
3803 | assert_eq_m128i(r, a); |
3804 | let r = _mm_mask_shrdi_epi64::<1>(a, 0b00000011, a, b); |
3805 | let e = _mm_set1_epi64x(1); |
3806 | assert_eq_m128i(r, e); |
3807 | } |
3808 | |
3809 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3810 | unsafe fn test_mm_maskz_shrdi_epi64() { |
3811 | let a = _mm_set1_epi64x(8); |
3812 | let b = _mm_set1_epi64x(2); |
3813 | let r = _mm_maskz_shrdi_epi64::<1>(0, a, b); |
3814 | assert_eq_m128i(r, _mm_setzero_si128()); |
3815 | let r = _mm_maskz_shrdi_epi64::<1>(0b00000011, a, b); |
3816 | let e = _mm_set1_epi64x(1); |
3817 | assert_eq_m128i(r, e); |
3818 | } |
3819 | |
3820 | #[simd_test(enable = "avx512vbmi2" )] |
3821 | unsafe fn test_mm512_shrdi_epi32() { |
3822 | let a = _mm512_set1_epi32(8); |
3823 | let b = _mm512_set1_epi32(2); |
3824 | let r = _mm512_shrdi_epi32::<1>(a, b); |
3825 | let e = _mm512_set1_epi32(1); |
3826 | assert_eq_m512i(r, e); |
3827 | } |
3828 | |
3829 | #[simd_test(enable = "avx512vbmi2" )] |
3830 | unsafe fn test_mm512_mask_shrdi_epi32() { |
3831 | let a = _mm512_set1_epi32(8); |
3832 | let b = _mm512_set1_epi32(2); |
3833 | let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b); |
3834 | assert_eq_m512i(r, a); |
3835 | let r = _mm512_mask_shrdi_epi32::<1>(a, 0b11111111_11111111, a, b); |
3836 | let e = _mm512_set1_epi32(1); |
3837 | assert_eq_m512i(r, e); |
3838 | } |
3839 | |
3840 | #[simd_test(enable = "avx512vbmi2" )] |
3841 | unsafe fn test_mm512_maskz_shrdi_epi32() { |
3842 | let a = _mm512_set1_epi32(8); |
3843 | let b = _mm512_set1_epi32(2); |
3844 | let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b); |
3845 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3846 | let r = _mm512_maskz_shrdi_epi32::<1>(0b11111111_11111111, a, b); |
3847 | let e = _mm512_set1_epi32(1); |
3848 | assert_eq_m512i(r, e); |
3849 | } |
3850 | |
3851 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3852 | unsafe fn test_mm256_shrdi_epi32() { |
3853 | let a = _mm256_set1_epi32(8); |
3854 | let b = _mm256_set1_epi32(2); |
3855 | let r = _mm256_shrdi_epi32::<1>(a, b); |
3856 | let e = _mm256_set1_epi32(1); |
3857 | assert_eq_m256i(r, e); |
3858 | } |
3859 | |
3860 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3861 | unsafe fn test_mm256_mask_shrdi_epi32() { |
3862 | let a = _mm256_set1_epi32(8); |
3863 | let b = _mm256_set1_epi32(2); |
3864 | let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b); |
3865 | assert_eq_m256i(r, a); |
3866 | let r = _mm256_mask_shrdi_epi32::<1>(a, 0b11111111, a, b); |
3867 | let e = _mm256_set1_epi32(1); |
3868 | assert_eq_m256i(r, e); |
3869 | } |
3870 | |
3871 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3872 | unsafe fn test_mm256_maskz_shrdi_epi32() { |
3873 | let a = _mm256_set1_epi32(8); |
3874 | let b = _mm256_set1_epi32(2); |
3875 | let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b); |
3876 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3877 | let r = _mm256_maskz_shrdi_epi32::<1>(0b11111111, a, b); |
3878 | let e = _mm256_set1_epi32(1); |
3879 | assert_eq_m256i(r, e); |
3880 | } |
3881 | |
3882 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3883 | unsafe fn test_mm_shrdi_epi32() { |
3884 | let a = _mm_set1_epi32(8); |
3885 | let b = _mm_set1_epi32(2); |
3886 | let r = _mm_shrdi_epi32::<1>(a, b); |
3887 | let e = _mm_set1_epi32(1); |
3888 | assert_eq_m128i(r, e); |
3889 | } |
3890 | |
3891 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3892 | unsafe fn test_mm_mask_shrdi_epi32() { |
3893 | let a = _mm_set1_epi32(8); |
3894 | let b = _mm_set1_epi32(2); |
3895 | let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b); |
3896 | assert_eq_m128i(r, a); |
3897 | let r = _mm_mask_shrdi_epi32::<1>(a, 0b00001111, a, b); |
3898 | let e = _mm_set1_epi32(1); |
3899 | assert_eq_m128i(r, e); |
3900 | } |
3901 | |
3902 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3903 | unsafe fn test_mm_maskz_shrdi_epi32() { |
3904 | let a = _mm_set1_epi32(8); |
3905 | let b = _mm_set1_epi32(2); |
3906 | let r = _mm_maskz_shrdi_epi32::<1>(0, a, b); |
3907 | assert_eq_m128i(r, _mm_setzero_si128()); |
3908 | let r = _mm_maskz_shrdi_epi32::<1>(0b00001111, a, b); |
3909 | let e = _mm_set1_epi32(1); |
3910 | assert_eq_m128i(r, e); |
3911 | } |
3912 | |
3913 | #[simd_test(enable = "avx512vbmi2" )] |
3914 | unsafe fn test_mm512_shrdi_epi16() { |
3915 | let a = _mm512_set1_epi16(8); |
3916 | let b = _mm512_set1_epi16(2); |
3917 | let r = _mm512_shrdi_epi16::<1>(a, b); |
3918 | let e = _mm512_set1_epi16(1); |
3919 | assert_eq_m512i(r, e); |
3920 | } |
3921 | |
3922 | #[simd_test(enable = "avx512vbmi2" )] |
3923 | unsafe fn test_mm512_mask_shrdi_epi16() { |
3924 | let a = _mm512_set1_epi16(8); |
3925 | let b = _mm512_set1_epi16(2); |
3926 | let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b); |
3927 | assert_eq_m512i(r, a); |
3928 | let r = _mm512_mask_shrdi_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a, b); |
3929 | let e = _mm512_set1_epi16(1); |
3930 | assert_eq_m512i(r, e); |
3931 | } |
3932 | |
3933 | #[simd_test(enable = "avx512vbmi2" )] |
3934 | unsafe fn test_mm512_maskz_shrdi_epi16() { |
3935 | let a = _mm512_set1_epi16(8); |
3936 | let b = _mm512_set1_epi16(2); |
3937 | let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b); |
3938 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3939 | let r = _mm512_maskz_shrdi_epi16::<1>(0b11111111_11111111_11111111_11111111, a, b); |
3940 | let e = _mm512_set1_epi16(1); |
3941 | assert_eq_m512i(r, e); |
3942 | } |
3943 | |
3944 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3945 | unsafe fn test_mm256_shrdi_epi16() { |
3946 | let a = _mm256_set1_epi16(8); |
3947 | let b = _mm256_set1_epi16(2); |
3948 | let r = _mm256_shrdi_epi16::<1>(a, b); |
3949 | let e = _mm256_set1_epi16(1); |
3950 | assert_eq_m256i(r, e); |
3951 | } |
3952 | |
3953 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3954 | unsafe fn test_mm256_mask_shrdi_epi16() { |
3955 | let a = _mm256_set1_epi16(8); |
3956 | let b = _mm256_set1_epi16(2); |
3957 | let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b); |
3958 | assert_eq_m256i(r, a); |
3959 | let r = _mm256_mask_shrdi_epi16::<1>(a, 0b11111111_11111111, a, b); |
3960 | let e = _mm256_set1_epi16(1); |
3961 | assert_eq_m256i(r, e); |
3962 | } |
3963 | |
3964 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3965 | unsafe fn test_mm256_maskz_shrdi_epi16() { |
3966 | let a = _mm256_set1_epi16(8); |
3967 | let b = _mm256_set1_epi16(2); |
3968 | let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b); |
3969 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3970 | let r = _mm256_maskz_shrdi_epi16::<1>(0b11111111_11111111, a, b); |
3971 | let e = _mm256_set1_epi16(1); |
3972 | assert_eq_m256i(r, e); |
3973 | } |
3974 | |
3975 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3976 | unsafe fn test_mm_shrdi_epi16() { |
3977 | let a = _mm_set1_epi16(8); |
3978 | let b = _mm_set1_epi16(2); |
3979 | let r = _mm_shrdi_epi16::<1>(a, b); |
3980 | let e = _mm_set1_epi16(1); |
3981 | assert_eq_m128i(r, e); |
3982 | } |
3983 | |
3984 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3985 | unsafe fn test_mm_mask_shrdi_epi16() { |
3986 | let a = _mm_set1_epi16(8); |
3987 | let b = _mm_set1_epi16(2); |
3988 | let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b); |
3989 | assert_eq_m128i(r, a); |
3990 | let r = _mm_mask_shrdi_epi16::<1>(a, 0b11111111, a, b); |
3991 | let e = _mm_set1_epi16(1); |
3992 | assert_eq_m128i(r, e); |
3993 | } |
3994 | |
3995 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3996 | unsafe fn test_mm_maskz_shrdi_epi16() { |
3997 | let a = _mm_set1_epi16(8); |
3998 | let b = _mm_set1_epi16(2); |
3999 | let r = _mm_maskz_shrdi_epi16::<1>(0, a, b); |
4000 | assert_eq_m128i(r, _mm_setzero_si128()); |
4001 | let r = _mm_maskz_shrdi_epi16::<1>(0b11111111, a, b); |
4002 | let e = _mm_set1_epi16(1); |
4003 | assert_eq_m128i(r, e); |
4004 | } |
4005 | |
4006 | #[simd_test(enable = "avx512vbmi2" )] |
4007 | unsafe fn test_mm512_mask_expandloadu_epi16() { |
4008 | let src = _mm512_set1_epi16(42); |
4009 | let a = &[ |
4010 | 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
4011 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
4012 | ]; |
4013 | let p = a.as_ptr(); |
4014 | let m = 0b11101000_11001010_11110000_00001111; |
4015 | let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p)); |
4016 | let e = _mm512_set_epi16( |
4017 | 16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42, |
4018 | 42, 42, 42, 42, 42, 4, 3, 2, 1, |
4019 | ); |
4020 | assert_eq_m512i(r, e); |
4021 | } |
4022 | |
4023 | #[simd_test(enable = "avx512vbmi2" )] |
4024 | unsafe fn test_mm512_maskz_expandloadu_epi16() { |
4025 | let a = &[ |
4026 | 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
4027 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
4028 | ]; |
4029 | let p = a.as_ptr(); |
4030 | let m = 0b11101000_11001010_11110000_00001111; |
4031 | let r = _mm512_maskz_expandloadu_epi16(m, black_box(p)); |
4032 | let e = _mm512_set_epi16( |
4033 | 16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0, |
4034 | 0, 4, 3, 2, 1, |
4035 | ); |
4036 | assert_eq_m512i(r, e); |
4037 | } |
4038 | |
4039 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
4040 | unsafe fn test_mm256_mask_expandloadu_epi16() { |
4041 | let src = _mm256_set1_epi16(42); |
4042 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
4043 | let p = a.as_ptr(); |
4044 | let m = 0b11101000_11001010; |
4045 | let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p)); |
4046 | let e = _mm256_set_epi16(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42); |
4047 | assert_eq_m256i(r, e); |
4048 | } |
4049 | |
4050 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
4051 | unsafe fn test_mm256_maskz_expandloadu_epi16() { |
4052 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
4053 | let p = a.as_ptr(); |
4054 | let m = 0b11101000_11001010; |
4055 | let r = _mm256_maskz_expandloadu_epi16(m, black_box(p)); |
4056 | let e = _mm256_set_epi16(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0); |
4057 | assert_eq_m256i(r, e); |
4058 | } |
4059 | |
4060 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
4061 | unsafe fn test_mm_mask_expandloadu_epi16() { |
4062 | let src = _mm_set1_epi16(42); |
4063 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; |
4064 | let p = a.as_ptr(); |
4065 | let m = 0b11101000; |
4066 | let r = _mm_mask_expandloadu_epi16(src, m, black_box(p)); |
4067 | let e = _mm_set_epi16(4, 3, 2, 42, 1, 42, 42, 42); |
4068 | assert_eq_m128i(r, e); |
4069 | } |
4070 | |
4071 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
4072 | unsafe fn test_mm_maskz_expandloadu_epi16() { |
4073 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; |
4074 | let p = a.as_ptr(); |
4075 | let m = 0b11101000; |
4076 | let r = _mm_maskz_expandloadu_epi16(m, black_box(p)); |
4077 | let e = _mm_set_epi16(4, 3, 2, 0, 1, 0, 0, 0); |
4078 | assert_eq_m128i(r, e); |
4079 | } |
4080 | |
4081 | #[simd_test(enable = "avx512vbmi2" )] |
4082 | unsafe fn test_mm512_mask_expandloadu_epi8() { |
4083 | let src = _mm512_set1_epi8(42); |
4084 | let a = &[ |
4085 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
4086 | 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
4087 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, |
4088 | ]; |
4089 | let p = a.as_ptr(); |
4090 | let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101; |
4091 | let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p)); |
4092 | let e = _mm512_set_epi8( |
4093 | 32, 31, 30, 42, 29, 42, 42, 42, 28, 27, 42, 42, 26, 42, 25, 42, 24, 23, 22, 21, 42, 42, |
4094 | 42, 42, 42, 42, 42, 42, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 42, 42, 42, 42, |
4095 | 42, 42, 42, 42, 8, 42, 7, 42, 6, 42, 5, 42, 42, 4, 42, 3, 42, 2, 42, 1, |
4096 | ); |
4097 | assert_eq_m512i(r, e); |
4098 | } |
4099 | |
4100 | #[simd_test(enable = "avx512vbmi2" )] |
4101 | unsafe fn test_mm512_maskz_expandloadu_epi8() { |
4102 | let a = &[ |
4103 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
4104 | 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
4105 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, |
4106 | ]; |
4107 | let p = a.as_ptr(); |
4108 | let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101; |
4109 | let r = _mm512_maskz_expandloadu_epi8(m, black_box(p)); |
4110 | let e = _mm512_set_epi8( |
4111 | 32, 31, 30, 0, 29, 0, 0, 0, 28, 27, 0, 0, 26, 0, 25, 0, 24, 23, 22, 21, 0, 0, 0, 0, 0, |
4112 | 0, 0, 0, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, |
4113 | 7, 0, 6, 0, 5, 0, 0, 4, 0, 3, 0, 2, 0, 1, |
4114 | ); |
4115 | assert_eq_m512i(r, e); |
4116 | } |
4117 | |
4118 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
4119 | unsafe fn test_mm256_mask_expandloadu_epi8() { |
4120 | let src = _mm256_set1_epi8(42); |
4121 | let a = &[ |
4122 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
4123 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
4124 | ]; |
4125 | let p = a.as_ptr(); |
4126 | let m = 0b11101000_11001010_11110000_00001111; |
4127 | let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p)); |
4128 | let e = _mm256_set_epi8( |
4129 | 16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42, |
4130 | 42, 42, 42, 42, 42, 4, 3, 2, 1, |
4131 | ); |
4132 | assert_eq_m256i(r, e); |
4133 | } |
4134 | |
4135 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
4136 | unsafe fn test_mm256_maskz_expandloadu_epi8() { |
4137 | let a = &[ |
4138 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
4139 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
4140 | ]; |
4141 | let p = a.as_ptr(); |
4142 | let m = 0b11101000_11001010_11110000_00001111; |
4143 | let r = _mm256_maskz_expandloadu_epi8(m, black_box(p)); |
4144 | let e = _mm256_set_epi8( |
4145 | 16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0, |
4146 | 0, 4, 3, 2, 1, |
4147 | ); |
4148 | assert_eq_m256i(r, e); |
4149 | } |
4150 | |
4151 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
4152 | unsafe fn test_mm_mask_expandloadu_epi8() { |
4153 | let src = _mm_set1_epi8(42); |
4154 | let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
4155 | let p = a.as_ptr(); |
4156 | let m = 0b11101000_11001010; |
4157 | let r = _mm_mask_expandloadu_epi8(src, m, black_box(p)); |
4158 | let e = _mm_set_epi8(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42); |
4159 | assert_eq_m128i(r, e); |
4160 | } |
4161 | |
4162 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
4163 | unsafe fn test_mm_maskz_expandloadu_epi8() { |
4164 | let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
4165 | let p = a.as_ptr(); |
4166 | let m = 0b11101000_11001010; |
4167 | let r = _mm_maskz_expandloadu_epi8(m, black_box(p)); |
4168 | let e = _mm_set_epi8(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0); |
4169 | assert_eq_m128i(r, e); |
4170 | } |
4171 | |
4172 | #[simd_test(enable = "avx512vbmi2" )] |
4173 | unsafe fn test_mm512_mask_compressstoreu_epi16() { |
4174 | let a = _mm512_set_epi16( |
4175 | 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, |
4176 | 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, |
4177 | ); |
4178 | let mut r = [0_i16; 32]; |
4179 | _mm512_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a); |
4180 | assert_eq!(&r, &[0_i16; 32]); |
4181 | _mm512_mask_compressstoreu_epi16( |
4182 | r.as_mut_ptr() as *mut _, |
4183 | 0b11110000_11001010_11111111_00000000, |
4184 | a, |
4185 | ); |
4186 | assert_eq!( |
4187 | &r, |
4188 | &[ |
4189 | 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, |
4190 | 0, 0, 0, 0, 0, 0, 0, 0, 0 |
4191 | ] |
4192 | ); |
4193 | } |
4194 | |
4195 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
4196 | unsafe fn test_mm256_mask_compressstoreu_epi16() { |
4197 | let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); |
4198 | let mut r = [0_i16; 16]; |
4199 | _mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a); |
4200 | assert_eq!(&r, &[0_i16; 16]); |
4201 | _mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0b11110000_11001010, a); |
4202 | assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]); |
4203 | } |
4204 | |
4205 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
4206 | unsafe fn test_mm_mask_compressstoreu_epi16() { |
4207 | let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); |
4208 | let mut r = [0_i16; 8]; |
4209 | _mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a); |
4210 | assert_eq!(&r, &[0_i16; 8]); |
4211 | _mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0b11110000, a); |
4212 | assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]); |
4213 | } |
4214 | |
4215 | #[simd_test(enable = "avx512vbmi2" )] |
4216 | unsafe fn test_mm512_mask_compressstoreu_epi8() { |
4217 | let a = _mm512_set_epi8( |
4218 | 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, |
4219 | 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, |
4220 | 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, |
4221 | ); |
4222 | let mut r = [0_i8; 64]; |
4223 | _mm512_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a); |
4224 | assert_eq!(&r, &[0_i8; 64]); |
4225 | _mm512_mask_compressstoreu_epi8( |
4226 | r.as_mut_ptr() as *mut _, |
4227 | 0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111, |
4228 | a, |
4229 | ); |
4230 | assert_eq!( |
4231 | &r, |
4232 | &[ |
4233 | 1, 2, 3, 4, 13, 14, 15, 16, 17, 19, 21, 23, 26, 28, 30, 32, 41, 42, 43, 44, 45, 46, |
4234 | 47, 48, 50, 52, 55, 56, 61, 62, 63, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
4235 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
4236 | ] |
4237 | ); |
4238 | } |
4239 | |
4240 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
4241 | unsafe fn test_mm256_mask_compressstoreu_epi8() { |
4242 | let a = _mm256_set_epi8( |
4243 | 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, |
4244 | 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, |
4245 | ); |
4246 | let mut r = [0_i8; 32]; |
4247 | _mm256_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a); |
4248 | assert_eq!(&r, &[0_i8; 32]); |
4249 | _mm256_mask_compressstoreu_epi8( |
4250 | r.as_mut_ptr() as *mut _, |
4251 | 0b11110000_11001010_11111111_00000000, |
4252 | a, |
4253 | ); |
4254 | assert_eq!( |
4255 | &r, |
4256 | &[ |
4257 | 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, |
4258 | 0, 0, 0, 0, 0, 0, 0, 0, 0 |
4259 | ] |
4260 | ); |
4261 | } |
4262 | |
4263 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
4264 | unsafe fn test_mm_mask_compressstoreu_epi8() { |
4265 | let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); |
4266 | let mut r = [0_i8; 16]; |
4267 | _mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a); |
4268 | assert_eq!(&r, &[0_i8; 16]); |
4269 | _mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0b11110000_11001010, a); |
4270 | assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]); |
4271 | } |
4272 | } |
4273 | |