1 | use crate::{ |
2 | core_arch::{simd::*, x86::*}, |
3 | intrinsics::simd::*, |
4 | }; |
5 | |
6 | #[cfg (test)] |
7 | use stdarch_test::assert_instr; |
8 | |
9 | /// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
10 | /// |
11 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi16) |
12 | #[inline ] |
13 | #[target_feature (enable = "avx512vbmi2" )] |
14 | #[cfg_attr (test, assert_instr(vpexpandw))] |
15 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
16 | pub unsafe fn _mm512_mask_expandloadu_epi16( |
17 | src: __m512i, |
18 | k: __mmask32, |
19 | mem_addr: *const i16, |
20 | ) -> __m512i { |
21 | transmute(src:expandloadw_512(mem_addr, a:src.as_i16x32(), mask:k)) |
22 | } |
23 | |
24 | /// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
25 | /// |
26 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi16) |
27 | #[inline ] |
28 | #[target_feature (enable = "avx512vbmi2" )] |
29 | #[cfg_attr (test, assert_instr(vpexpandw))] |
30 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
31 | pub unsafe fn _mm512_maskz_expandloadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i { |
32 | _mm512_mask_expandloadu_epi16(src:_mm512_setzero_si512(), k, mem_addr) |
33 | } |
34 | |
35 | /// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
36 | /// |
37 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi16) |
38 | #[inline ] |
39 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
40 | #[cfg_attr (test, assert_instr(vpexpandw))] |
41 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
42 | pub unsafe fn _mm256_mask_expandloadu_epi16( |
43 | src: __m256i, |
44 | k: __mmask16, |
45 | mem_addr: *const i16, |
46 | ) -> __m256i { |
47 | transmute(src:expandloadw_256(mem_addr, a:src.as_i16x16(), mask:k)) |
48 | } |
49 | |
50 | /// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
51 | /// |
52 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi16) |
53 | #[inline ] |
54 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
55 | #[cfg_attr (test, assert_instr(vpexpandw))] |
56 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
57 | pub unsafe fn _mm256_maskz_expandloadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i { |
58 | _mm256_mask_expandloadu_epi16(src:_mm256_setzero_si256(), k, mem_addr) |
59 | } |
60 | |
61 | /// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
62 | /// |
63 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi16) |
64 | #[inline ] |
65 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
66 | #[cfg_attr (test, assert_instr(vpexpandw))] |
67 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
68 | pub unsafe fn _mm_mask_expandloadu_epi16( |
69 | src: __m128i, |
70 | k: __mmask8, |
71 | mem_addr: *const i16, |
72 | ) -> __m128i { |
73 | transmute(src:expandloadw_128(mem_addr, a:src.as_i16x8(), mask:k)) |
74 | } |
75 | |
76 | /// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
77 | /// |
78 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi16) |
79 | #[inline ] |
80 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
81 | #[cfg_attr (test, assert_instr(vpexpandw))] |
82 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
83 | pub unsafe fn _mm_maskz_expandloadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i { |
84 | _mm_mask_expandloadu_epi16(src:_mm_setzero_si128(), k, mem_addr) |
85 | } |
86 | |
87 | /// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
88 | /// |
89 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi8) |
90 | #[inline ] |
91 | #[target_feature (enable = "avx512vbmi2" )] |
92 | #[cfg_attr (test, assert_instr(vpexpandb))] |
93 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
94 | pub unsafe fn _mm512_mask_expandloadu_epi8( |
95 | src: __m512i, |
96 | k: __mmask64, |
97 | mem_addr: *const i8, |
98 | ) -> __m512i { |
99 | transmute(src:expandloadb_512(mem_addr, a:src.as_i8x64(), mask:k)) |
100 | } |
101 | |
102 | /// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
103 | /// |
104 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi8) |
105 | #[inline ] |
106 | #[target_feature (enable = "avx512vbmi2" )] |
107 | #[cfg_attr (test, assert_instr(vpexpandb))] |
108 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
109 | pub unsafe fn _mm512_maskz_expandloadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i { |
110 | _mm512_mask_expandloadu_epi8(src:_mm512_setzero_si512(), k, mem_addr) |
111 | } |
112 | |
113 | /// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
114 | /// |
115 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi8) |
116 | #[inline ] |
117 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
118 | #[cfg_attr (test, assert_instr(vpexpandb))] |
119 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
120 | pub unsafe fn _mm256_mask_expandloadu_epi8( |
121 | src: __m256i, |
122 | k: __mmask32, |
123 | mem_addr: *const i8, |
124 | ) -> __m256i { |
125 | transmute(src:expandloadb_256(mem_addr, a:src.as_i8x32(), mask:k)) |
126 | } |
127 | |
128 | /// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
129 | /// |
130 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi8) |
131 | #[inline ] |
132 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
133 | #[cfg_attr (test, assert_instr(vpexpandb))] |
134 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
135 | pub unsafe fn _mm256_maskz_expandloadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i { |
136 | _mm256_mask_expandloadu_epi8(src:_mm256_setzero_si256(), k, mem_addr) |
137 | } |
138 | |
139 | /// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
140 | /// |
141 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi8) |
142 | #[inline ] |
143 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
144 | #[cfg_attr (test, assert_instr(vpexpandb))] |
145 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
146 | pub unsafe fn _mm_mask_expandloadu_epi8( |
147 | src: __m128i, |
148 | k: __mmask16, |
149 | mem_addr: *const i8, |
150 | ) -> __m128i { |
151 | transmute(src:expandloadb_128(mem_addr, a:src.as_i8x16(), mask:k)) |
152 | } |
153 | |
154 | /// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
155 | /// |
156 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi8) |
157 | #[inline ] |
158 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
159 | #[cfg_attr (test, assert_instr(vpexpandb))] |
160 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
161 | pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i { |
162 | _mm_mask_expandloadu_epi8(src:_mm_setzero_si128(), k, mem_addr) |
163 | } |
164 | |
165 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
166 | /// |
167 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi16) |
168 | #[inline ] |
169 | #[target_feature (enable = "avx512vbmi2" )] |
170 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
171 | #[cfg_attr (test, assert_instr(vpcompressw))] |
172 | pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask32, a: __m512i) { |
173 | vcompressstorew(mem:base_addr as *mut _, data:a.as_i16x32(), mask:k) |
174 | } |
175 | |
176 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
177 | /// |
178 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi16) |
179 | #[inline ] |
180 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
181 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
182 | #[cfg_attr (test, assert_instr(vpcompressw))] |
183 | pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask16, a: __m256i) { |
184 | vcompressstorew256(mem:base_addr as *mut _, data:a.as_i16x16(), mask:k) |
185 | } |
186 | |
187 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
188 | /// |
189 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi16) |
190 | #[inline ] |
191 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
192 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
193 | #[cfg_attr (test, assert_instr(vpcompressw))] |
194 | pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask8, a: __m128i) { |
195 | vcompressstorew128(mem:base_addr as *mut _, data:a.as_i16x8(), mask:k) |
196 | } |
197 | |
198 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
199 | /// |
200 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi8) |
201 | #[inline ] |
202 | #[target_feature (enable = "avx512vbmi2" )] |
203 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
204 | #[cfg_attr (test, assert_instr(vpcompressb))] |
205 | pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask64, a: __m512i) { |
206 | vcompressstoreb(mem:base_addr as *mut _, data:a.as_i8x64(), mask:k) |
207 | } |
208 | |
209 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
210 | /// |
211 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi8) |
212 | #[inline ] |
213 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
214 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
215 | #[cfg_attr (test, assert_instr(vpcompressb))] |
216 | pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask32, a: __m256i) { |
217 | vcompressstoreb256(mem:base_addr as *mut _, data:a.as_i8x32(), mask:k) |
218 | } |
219 | |
220 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
221 | /// |
222 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi8) |
223 | #[inline ] |
224 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
225 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
226 | #[cfg_attr (test, assert_instr(vpcompressb))] |
227 | pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask16, a: __m128i) { |
228 | vcompressstoreb128(mem:base_addr as *mut _, data:a.as_i8x16(), mask:k) |
229 | } |
230 | |
231 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. |
232 | /// |
233 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi16&expand=1192) |
234 | #[inline ] |
235 | #[target_feature (enable = "avx512vbmi2" )] |
236 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
237 | #[cfg_attr (test, assert_instr(vpcompressw))] |
238 | pub fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { |
239 | unsafe { transmute(src:vpcompressw(a.as_i16x32(), src.as_i16x32(), mask:k)) } |
240 | } |
241 | |
242 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. |
243 | /// |
244 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi16&expand=1193) |
245 | #[inline ] |
246 | #[target_feature (enable = "avx512vbmi2" )] |
247 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
248 | #[cfg_attr (test, assert_instr(vpcompressw))] |
249 | pub fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i { |
250 | unsafe { transmute(src:vpcompressw(a.as_i16x32(), src:i16x32::ZERO, mask:k)) } |
251 | } |
252 | |
253 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. |
254 | /// |
255 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi16&expand=1190) |
256 | #[inline ] |
257 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
258 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
259 | #[cfg_attr (test, assert_instr(vpcompressw))] |
260 | pub fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { |
261 | unsafe { transmute(src:vpcompressw256(a.as_i16x16(), src.as_i16x16(), mask:k)) } |
262 | } |
263 | |
264 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. |
265 | /// |
266 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi16&expand=1191) |
267 | #[inline ] |
268 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
269 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
270 | #[cfg_attr (test, assert_instr(vpcompressw))] |
271 | pub fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i { |
272 | unsafe { transmute(src:vpcompressw256(a.as_i16x16(), src:i16x16::ZERO, mask:k)) } |
273 | } |
274 | |
275 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. |
276 | /// |
277 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi16&expand=1188) |
278 | #[inline ] |
279 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
280 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
281 | #[cfg_attr (test, assert_instr(vpcompressw))] |
282 | pub fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
283 | unsafe { transmute(src:vpcompressw128(a.as_i16x8(), src.as_i16x8(), mask:k)) } |
284 | } |
285 | |
286 | /// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. |
287 | /// |
288 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi16&expand=1189) |
289 | #[inline ] |
290 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
291 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
292 | #[cfg_attr (test, assert_instr(vpcompressw))] |
293 | pub fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i { |
294 | unsafe { transmute(src:vpcompressw128(a.as_i16x8(), src:i16x8::ZERO, mask:k)) } |
295 | } |
296 | |
297 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. |
298 | /// |
299 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi8&expand=1210) |
300 | #[inline ] |
301 | #[target_feature (enable = "avx512vbmi2" )] |
302 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
303 | #[cfg_attr (test, assert_instr(vpcompressb))] |
304 | pub fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { |
305 | unsafe { transmute(src:vpcompressb(a.as_i8x64(), src.as_i8x64(), mask:k)) } |
306 | } |
307 | |
308 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. |
309 | /// |
310 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi8&expand=1211) |
311 | #[inline ] |
312 | #[target_feature (enable = "avx512vbmi2" )] |
313 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
314 | #[cfg_attr (test, assert_instr(vpcompressb))] |
315 | pub fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i { |
316 | unsafe { transmute(src:vpcompressb(a.as_i8x64(), src:i8x64::ZERO, mask:k)) } |
317 | } |
318 | |
319 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. |
320 | /// |
321 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi8&expand=1208) |
322 | #[inline ] |
323 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
324 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
325 | #[cfg_attr (test, assert_instr(vpcompressb))] |
326 | pub fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { |
327 | unsafe { transmute(src:vpcompressb256(a.as_i8x32(), src.as_i8x32(), mask:k)) } |
328 | } |
329 | |
330 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. |
331 | /// |
332 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi8&expand=1209) |
333 | #[inline ] |
334 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
335 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
336 | #[cfg_attr (test, assert_instr(vpcompressb))] |
337 | pub fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i { |
338 | unsafe { transmute(src:vpcompressb256(a.as_i8x32(), src:i8x32::ZERO, mask:k)) } |
339 | } |
340 | |
341 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. |
342 | /// |
343 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi8&expand=1206) |
344 | #[inline ] |
345 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
346 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
347 | #[cfg_attr (test, assert_instr(vpcompressb))] |
348 | pub fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { |
349 | unsafe { transmute(src:vpcompressb128(a.as_i8x16(), src.as_i8x16(), mask:k)) } |
350 | } |
351 | |
352 | /// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. |
353 | /// |
354 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi8&expand=1207) |
355 | #[inline ] |
356 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
357 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
358 | #[cfg_attr (test, assert_instr(vpcompressb))] |
359 | pub fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i { |
360 | unsafe { transmute(src:vpcompressb128(a.as_i8x16(), src:i8x16::ZERO, mask:k)) } |
361 | } |
362 | |
363 | /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
364 | /// |
365 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi16&expand=2310) |
366 | #[inline ] |
367 | #[target_feature (enable = "avx512vbmi2" )] |
368 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
369 | #[cfg_attr (test, assert_instr(vpexpandw))] |
370 | pub fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { |
371 | unsafe { transmute(src:vpexpandw(a.as_i16x32(), src.as_i16x32(), mask:k)) } |
372 | } |
373 | |
374 | /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
375 | /// |
376 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi16&expand=2311) |
377 | #[inline ] |
378 | #[target_feature (enable = "avx512vbmi2" )] |
379 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
380 | #[cfg_attr (test, assert_instr(vpexpandw))] |
381 | pub fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i { |
382 | unsafe { transmute(src:vpexpandw(a.as_i16x32(), src:i16x32::ZERO, mask:k)) } |
383 | } |
384 | |
385 | /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
386 | /// |
387 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi16&expand=2308) |
388 | #[inline ] |
389 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
390 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
391 | #[cfg_attr (test, assert_instr(vpexpandw))] |
392 | pub fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { |
393 | unsafe { transmute(src:vpexpandw256(a.as_i16x16(), src.as_i16x16(), mask:k)) } |
394 | } |
395 | |
396 | /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
397 | /// |
398 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi16&expand=2309) |
399 | #[inline ] |
400 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
401 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
402 | #[cfg_attr (test, assert_instr(vpexpandw))] |
403 | pub fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i { |
404 | unsafe { transmute(src:vpexpandw256(a.as_i16x16(), src:i16x16::ZERO, mask:k)) } |
405 | } |
406 | |
407 | /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
408 | /// |
409 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi16&expand=2306) |
410 | #[inline ] |
411 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
412 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
413 | #[cfg_attr (test, assert_instr(vpexpandw))] |
414 | pub fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
415 | unsafe { transmute(src:vpexpandw128(a.as_i16x8(), src.as_i16x8(), mask:k)) } |
416 | } |
417 | |
418 | /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
419 | /// |
420 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi16&expand=2307) |
421 | #[inline ] |
422 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
423 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
424 | #[cfg_attr (test, assert_instr(vpexpandw))] |
425 | pub fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i { |
426 | unsafe { transmute(src:vpexpandw128(a.as_i16x8(), src:i16x8::ZERO, mask:k)) } |
427 | } |
428 | |
429 | /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
430 | /// |
431 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi8&expand=2328) |
432 | #[inline ] |
433 | #[target_feature (enable = "avx512vbmi2" )] |
434 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
435 | #[cfg_attr (test, assert_instr(vpexpandb))] |
436 | pub fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { |
437 | unsafe { transmute(src:vpexpandb(a.as_i8x64(), src.as_i8x64(), mask:k)) } |
438 | } |
439 | |
440 | /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
441 | /// |
442 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi8&expand=2329) |
443 | #[inline ] |
444 | #[target_feature (enable = "avx512vbmi2" )] |
445 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
446 | #[cfg_attr (test, assert_instr(vpexpandb))] |
447 | pub fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i { |
448 | unsafe { transmute(src:vpexpandb(a.as_i8x64(), src:i8x64::ZERO, mask:k)) } |
449 | } |
450 | |
451 | /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
452 | /// |
453 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi8&expand=2326) |
454 | #[inline ] |
455 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
456 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
457 | #[cfg_attr (test, assert_instr(vpexpandb))] |
458 | pub fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { |
459 | unsafe { transmute(src:vpexpandb256(a.as_i8x32(), src.as_i8x32(), mask:k)) } |
460 | } |
461 | |
462 | /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
463 | /// |
464 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi8&expand=2327) |
465 | #[inline ] |
466 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
467 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
468 | #[cfg_attr (test, assert_instr(vpexpandb))] |
469 | pub fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i { |
470 | unsafe { transmute(src:vpexpandb256(a.as_i8x32(), src:i8x32::ZERO, mask:k)) } |
471 | } |
472 | |
473 | /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
474 | /// |
475 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi8&expand=2324) |
476 | #[inline ] |
477 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
478 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
479 | #[cfg_attr (test, assert_instr(vpexpandb))] |
480 | pub fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { |
481 | unsafe { transmute(src:vpexpandb128(a.as_i8x16(), src.as_i8x16(), mask:k)) } |
482 | } |
483 | |
484 | /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
485 | /// |
486 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi8&expand=2325) |
487 | #[inline ] |
488 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
489 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
490 | #[cfg_attr (test, assert_instr(vpexpandb))] |
491 | pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i { |
492 | unsafe { transmute(src:vpexpandb128(a.as_i8x16(), src:i8x16::ZERO, mask:k)) } |
493 | } |
494 | |
495 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. |
496 | /// |
497 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi64&expand=5087) |
498 | #[inline ] |
499 | #[target_feature (enable = "avx512vbmi2" )] |
500 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
501 | #[cfg_attr (test, assert_instr(vpshldvq))] |
502 | pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
503 | unsafe { transmute(src:vpshldvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) } |
504 | } |
505 | |
506 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
507 | /// |
508 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi64&expand=5085) |
509 | #[inline ] |
510 | #[target_feature (enable = "avx512vbmi2" )] |
511 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
512 | #[cfg_attr (test, assert_instr(vpshldvq))] |
513 | pub fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { |
514 | unsafe { |
515 | let shf: i64x8 = _mm512_shldv_epi64(a, b, c).as_i64x8(); |
516 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x8())) |
517 | } |
518 | } |
519 | |
520 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
521 | /// |
522 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi64&expand=5086) |
523 | #[inline ] |
524 | #[target_feature (enable = "avx512vbmi2" )] |
525 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
526 | #[cfg_attr (test, assert_instr(vpshldvq))] |
527 | pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
528 | unsafe { |
529 | let shf: i64x8 = _mm512_shldv_epi64(a, b, c).as_i64x8(); |
530 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO)) |
531 | } |
532 | } |
533 | |
534 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. |
535 | /// |
536 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi64&expand=5084) |
537 | #[inline ] |
538 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
539 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
540 | #[cfg_attr (test, assert_instr(vpshldvq))] |
541 | pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
542 | unsafe { transmute(src:vpshldvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) } |
543 | } |
544 | |
545 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
546 | /// |
547 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi64&expand=5082) |
548 | #[inline ] |
549 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
550 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
551 | #[cfg_attr (test, assert_instr(vpshldvq))] |
552 | pub fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { |
553 | unsafe { |
554 | let shf: i64x4 = _mm256_shldv_epi64(a, b, c).as_i64x4(); |
555 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x4())) |
556 | } |
557 | } |
558 | |
559 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
560 | /// |
561 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi64&expand=5083) |
562 | #[inline ] |
563 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
564 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
565 | #[cfg_attr (test, assert_instr(vpshldvq))] |
566 | pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
567 | unsafe { |
568 | let shf: i64x4 = _mm256_shldv_epi64(a, b, c).as_i64x4(); |
569 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO)) |
570 | } |
571 | } |
572 | |
573 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. |
574 | /// |
575 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi64&expand=5081) |
576 | #[inline ] |
577 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
578 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
579 | #[cfg_attr (test, assert_instr(vpshldvq))] |
580 | pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
581 | unsafe { transmute(src:vpshldvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) } |
582 | } |
583 | |
584 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
585 | /// |
586 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi64&expand=5079) |
587 | #[inline ] |
588 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
589 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
590 | #[cfg_attr (test, assert_instr(vpshldvq))] |
591 | pub fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { |
592 | unsafe { |
593 | let shf: i64x2 = _mm_shldv_epi64(a, b, c).as_i64x2(); |
594 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x2())) |
595 | } |
596 | } |
597 | |
598 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
599 | /// |
600 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi64&expand=5080) |
601 | #[inline ] |
602 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
603 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
604 | #[cfg_attr (test, assert_instr(vpshldvq))] |
605 | pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
606 | unsafe { |
607 | let shf: i64x2 = _mm_shldv_epi64(a, b, c).as_i64x2(); |
608 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO)) |
609 | } |
610 | } |
611 | |
612 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. |
613 | /// |
614 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi32&expand=5078) |
615 | #[inline ] |
616 | #[target_feature (enable = "avx512vbmi2" )] |
617 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
618 | #[cfg_attr (test, assert_instr(vpshldvd))] |
619 | pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
620 | unsafe { transmute(src:vpshldvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) } |
621 | } |
622 | |
623 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
624 | /// |
625 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi32&expand=5076) |
626 | #[inline ] |
627 | #[target_feature (enable = "avx512vbmi2" )] |
628 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
629 | #[cfg_attr (test, assert_instr(vpshldvd))] |
630 | pub fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i { |
631 | unsafe { |
632 | let shf: i32x16 = _mm512_shldv_epi32(a, b, c).as_i32x16(); |
633 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x16())) |
634 | } |
635 | } |
636 | |
637 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
638 | /// |
639 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi32&expand=5077) |
640 | #[inline ] |
641 | #[target_feature (enable = "avx512vbmi2" )] |
642 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
643 | #[cfg_attr (test, assert_instr(vpshldvd))] |
644 | pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
645 | unsafe { |
646 | let shf: i32x16 = _mm512_shldv_epi32(a, b, c).as_i32x16(); |
647 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO)) |
648 | } |
649 | } |
650 | |
651 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. |
652 | /// |
653 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi32&expand=5075) |
654 | #[inline ] |
655 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
656 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
657 | #[cfg_attr (test, assert_instr(vpshldvd))] |
658 | pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
659 | unsafe { transmute(src:vpshldvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) } |
660 | } |
661 | |
662 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
663 | /// |
664 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi32&expand=5073) |
665 | #[inline ] |
666 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
667 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
668 | #[cfg_attr (test, assert_instr(vpshldvd))] |
669 | pub fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { |
670 | unsafe { |
671 | let shf: i32x8 = _mm256_shldv_epi32(a, b, c).as_i32x8(); |
672 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x8())) |
673 | } |
674 | } |
675 | |
676 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
677 | /// |
678 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi32&expand=5074) |
679 | #[inline ] |
680 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
681 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
682 | #[cfg_attr (test, assert_instr(vpshldvd))] |
683 | pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
684 | unsafe { |
685 | let shf: i32x8 = _mm256_shldv_epi32(a, b, c).as_i32x8(); |
686 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO)) |
687 | } |
688 | } |
689 | |
690 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. |
691 | /// |
692 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi32&expand=5072) |
693 | #[inline ] |
694 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
695 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
696 | #[cfg_attr (test, assert_instr(vpshldvd))] |
697 | pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
698 | unsafe { transmute(src:vpshldvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) } |
699 | } |
700 | |
701 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
702 | /// |
703 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi32&expand=5070) |
704 | #[inline ] |
705 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
706 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
707 | #[cfg_attr (test, assert_instr(vpshldvd))] |
708 | pub fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { |
709 | unsafe { |
710 | let shf: i32x4 = _mm_shldv_epi32(a, b, c).as_i32x4(); |
711 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x4())) |
712 | } |
713 | } |
714 | |
715 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
716 | /// |
717 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi32&expand=5071) |
718 | #[inline ] |
719 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
720 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
721 | #[cfg_attr (test, assert_instr(vpshldvd))] |
722 | pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
723 | unsafe { |
724 | let shf: i32x4 = _mm_shldv_epi32(a, b, c).as_i32x4(); |
725 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO)) |
726 | } |
727 | } |
728 | |
729 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. |
730 | /// |
731 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi16&expand=5069) |
732 | #[inline ] |
733 | #[target_feature (enable = "avx512vbmi2" )] |
734 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
735 | #[cfg_attr (test, assert_instr(vpshldvw))] |
736 | pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
737 | unsafe { transmute(src:vpshldvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) } |
738 | } |
739 | |
740 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
741 | /// |
742 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi16&expand=5067) |
743 | #[inline ] |
744 | #[target_feature (enable = "avx512vbmi2" )] |
745 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
746 | #[cfg_attr (test, assert_instr(vpshldvw))] |
747 | pub fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i { |
748 | unsafe { |
749 | let shf: i16x32 = _mm512_shldv_epi16(a, b, c).as_i16x32(); |
750 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x32())) |
751 | } |
752 | } |
753 | |
754 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
755 | /// |
756 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi16&expand=5068) |
757 | #[inline ] |
758 | #[target_feature (enable = "avx512vbmi2" )] |
759 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
760 | #[cfg_attr (test, assert_instr(vpshldvw))] |
761 | pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
762 | unsafe { |
763 | let shf: i16x32 = _mm512_shldv_epi16(a, b, c).as_i16x32(); |
764 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x32::ZERO)) |
765 | } |
766 | } |
767 | |
768 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. |
769 | /// |
770 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi16&expand=5066) |
771 | #[inline ] |
772 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
773 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
774 | #[cfg_attr (test, assert_instr(vpshldvw))] |
775 | pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
776 | unsafe { transmute(src:vpshldvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) } |
777 | } |
778 | |
779 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
780 | /// |
781 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi16&expand=5064) |
782 | #[inline ] |
783 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
784 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
785 | #[cfg_attr (test, assert_instr(vpshldvw))] |
786 | pub fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i { |
787 | unsafe { |
788 | let shf: i16x16 = _mm256_shldv_epi16(a, b, c).as_i16x16(); |
789 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x16())) |
790 | } |
791 | } |
792 | |
793 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
794 | /// |
795 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi16&expand=5065) |
796 | #[inline ] |
797 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
798 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
799 | #[cfg_attr (test, assert_instr(vpshldvw))] |
800 | pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
801 | unsafe { |
802 | let shf: i16x16 = _mm256_shldv_epi16(a, b, c).as_i16x16(); |
803 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x16::ZERO)) |
804 | } |
805 | } |
806 | |
807 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. |
808 | /// |
809 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi16&expand=5063) |
810 | #[inline ] |
811 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
812 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
813 | #[cfg_attr (test, assert_instr(vpshldvw))] |
814 | pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
815 | unsafe { transmute(src:vpshldvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) } |
816 | } |
817 | |
818 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
819 | /// |
820 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi16&expand=5061) |
821 | #[inline ] |
822 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
823 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
824 | #[cfg_attr (test, assert_instr(vpshldvw))] |
825 | pub fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { |
826 | unsafe { |
827 | let shf: i16x8 = _mm_shldv_epi16(a, b, c).as_i16x8(); |
828 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x8())) |
829 | } |
830 | } |
831 | |
832 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
833 | /// |
834 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi16&expand=5062) |
835 | #[inline ] |
836 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
837 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
838 | #[cfg_attr (test, assert_instr(vpshldvw))] |
839 | pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
840 | unsafe { |
841 | let shf: i16x8 = _mm_shldv_epi16(a, b, c).as_i16x8(); |
842 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x8::ZERO)) |
843 | } |
844 | } |
845 | |
846 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. |
847 | /// |
848 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi64&expand=5141) |
849 | #[inline ] |
850 | #[target_feature (enable = "avx512vbmi2" )] |
851 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
852 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
853 | pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
854 | unsafe { transmute(src:vpshrdvq(a:b.as_i64x8(), b:a.as_i64x8(), c.as_i64x8())) } |
855 | } |
856 | |
857 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
858 | /// |
859 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi64&expand=5139) |
860 | #[inline ] |
861 | #[target_feature (enable = "avx512vbmi2" )] |
862 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
863 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
864 | pub fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { |
865 | unsafe { |
866 | let shf: i64x8 = _mm512_shrdv_epi64(a, b, c).as_i64x8(); |
867 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x8())) |
868 | } |
869 | } |
870 | |
871 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
872 | /// |
873 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi64&expand=5140) |
874 | #[inline ] |
875 | #[target_feature (enable = "avx512vbmi2" )] |
876 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
877 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
878 | pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
879 | unsafe { |
880 | let shf: i64x8 = _mm512_shrdv_epi64(a, b, c).as_i64x8(); |
881 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO)) |
882 | } |
883 | } |
884 | |
885 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. |
886 | /// |
887 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi64&expand=5138) |
888 | #[inline ] |
889 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
890 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
891 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
892 | pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
893 | unsafe { transmute(src:vpshrdvq256(a:b.as_i64x4(), b:a.as_i64x4(), c.as_i64x4())) } |
894 | } |
895 | |
896 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
897 | /// |
898 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi64&expand=5136) |
899 | #[inline ] |
900 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
901 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
902 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
903 | pub fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { |
904 | unsafe { |
905 | let shf: i64x4 = _mm256_shrdv_epi64(a, b, c).as_i64x4(); |
906 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x4())) |
907 | } |
908 | } |
909 | |
910 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
911 | /// |
912 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi64&expand=5137) |
913 | #[inline ] |
914 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
915 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
916 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
917 | pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
918 | unsafe { |
919 | let shf: i64x4 = _mm256_shrdv_epi64(a, b, c).as_i64x4(); |
920 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO)) |
921 | } |
922 | } |
923 | |
924 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. |
925 | /// |
926 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi64&expand=5135) |
927 | #[inline ] |
928 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
929 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
930 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
931 | pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
932 | unsafe { transmute(src:vpshrdvq128(a:b.as_i64x2(), b:a.as_i64x2(), c.as_i64x2())) } |
933 | } |
934 | |
935 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
936 | /// |
937 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi64&expand=5133) |
938 | #[inline ] |
939 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
940 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
941 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
942 | pub fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { |
943 | unsafe { |
944 | let shf: i64x2 = _mm_shrdv_epi64(a, b, c).as_i64x2(); |
945 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x2())) |
946 | } |
947 | } |
948 | |
949 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
950 | /// |
951 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi64&expand=5134) |
952 | #[inline ] |
953 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
954 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
955 | #[cfg_attr (test, assert_instr(vpshrdvq))] |
956 | pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
957 | unsafe { |
958 | let shf: i64x2 = _mm_shrdv_epi64(a, b, c).as_i64x2(); |
959 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO)) |
960 | } |
961 | } |
962 | |
963 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. |
964 | /// |
965 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi32&expand=5132) |
966 | #[inline ] |
967 | #[target_feature (enable = "avx512vbmi2" )] |
968 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
969 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
970 | pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
971 | unsafe { transmute(src:vpshrdvd(a:b.as_i32x16(), b:a.as_i32x16(), c.as_i32x16())) } |
972 | } |
973 | |
974 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
975 | /// |
976 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi32&expand=5130) |
977 | #[inline ] |
978 | #[target_feature (enable = "avx512vbmi2" )] |
979 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
980 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
981 | pub fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i { |
982 | unsafe { |
983 | let shf: i32x16 = _mm512_shrdv_epi32(a, b, c).as_i32x16(); |
984 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x16())) |
985 | } |
986 | } |
987 | |
988 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
989 | /// |
990 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi32&expand=5131) |
991 | #[inline ] |
992 | #[target_feature (enable = "avx512vbmi2" )] |
993 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
994 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
995 | pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
996 | unsafe { |
997 | let shf: i32x16 = _mm512_shrdv_epi32(a, b, c).as_i32x16(); |
998 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO)) |
999 | } |
1000 | } |
1001 | |
1002 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. |
1003 | /// |
1004 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi32&expand=5129) |
1005 | #[inline ] |
1006 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1007 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1008 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1009 | pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
1010 | unsafe { transmute(src:vpshrdvd256(a:b.as_i32x8(), b:a.as_i32x8(), c.as_i32x8())) } |
1011 | } |
1012 | |
1013 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1014 | /// |
1015 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi32&expand=5127) |
1016 | #[inline ] |
1017 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1018 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1019 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1020 | pub fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { |
1021 | unsafe { |
1022 | let shf: i32x8 = _mm256_shrdv_epi32(a, b, c).as_i32x8(); |
1023 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x8())) |
1024 | } |
1025 | } |
1026 | |
1027 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1028 | /// |
1029 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi32&expand=5128) |
1030 | #[inline ] |
1031 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1032 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1033 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1034 | pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
1035 | unsafe { |
1036 | let shf: i32x8 = _mm256_shrdv_epi32(a, b, c).as_i32x8(); |
1037 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO)) |
1038 | } |
1039 | } |
1040 | |
1041 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. |
1042 | /// |
1043 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi32&expand=5126) |
1044 | #[inline ] |
1045 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1046 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1047 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1048 | pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
1049 | unsafe { transmute(src:vpshrdvd128(a:b.as_i32x4(), b:a.as_i32x4(), c.as_i32x4())) } |
1050 | } |
1051 | |
1052 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1053 | /// |
1054 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi32&expand=5124) |
1055 | #[inline ] |
1056 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1057 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1058 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1059 | pub fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { |
1060 | unsafe { |
1061 | let shf: i32x4 = _mm_shrdv_epi32(a, b, c).as_i32x4(); |
1062 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x4())) |
1063 | } |
1064 | } |
1065 | |
1066 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1067 | /// |
1068 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi32&expand=5125) |
1069 | #[inline ] |
1070 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1071 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1072 | #[cfg_attr (test, assert_instr(vpshrdvd))] |
1073 | pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
1074 | unsafe { |
1075 | let shf: i32x4 = _mm_shrdv_epi32(a, b, c).as_i32x4(); |
1076 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO)) |
1077 | } |
1078 | } |
1079 | |
1080 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. |
1081 | /// |
1082 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi16&expand=5123) |
1083 | #[inline ] |
1084 | #[target_feature (enable = "avx512vbmi2" )] |
1085 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1086 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1087 | pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
1088 | unsafe { transmute(src:vpshrdvw(a:b.as_i16x32(), b:a.as_i16x32(), c.as_i16x32())) } |
1089 | } |
1090 | |
1091 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1092 | /// |
1093 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi16&expand=5121) |
1094 | #[inline ] |
1095 | #[target_feature (enable = "avx512vbmi2" )] |
1096 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1097 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1098 | pub fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i { |
1099 | unsafe { |
1100 | let shf: i16x32 = _mm512_shrdv_epi16(a, b, c).as_i16x32(); |
1101 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x32())) |
1102 | } |
1103 | } |
1104 | |
1105 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1106 | /// |
1107 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi16&expand=5122) |
1108 | #[inline ] |
1109 | #[target_feature (enable = "avx512vbmi2" )] |
1110 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1111 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1112 | pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
1113 | unsafe { |
1114 | let shf: i16x32 = _mm512_shrdv_epi16(a, b, c).as_i16x32(); |
1115 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x32::ZERO)) |
1116 | } |
1117 | } |
1118 | |
1119 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. |
1120 | /// |
1121 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi16&expand=5120) |
1122 | #[inline ] |
1123 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1124 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1125 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1126 | pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
1127 | unsafe { transmute(src:vpshrdvw256(a:b.as_i16x16(), b:a.as_i16x16(), c.as_i16x16())) } |
1128 | } |
1129 | |
1130 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1131 | /// |
1132 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi16&expand=5118) |
1133 | #[inline ] |
1134 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1135 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1136 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1137 | pub fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i { |
1138 | unsafe { |
1139 | let shf: i16x16 = _mm256_shrdv_epi16(a, b, c).as_i16x16(); |
1140 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x16())) |
1141 | } |
1142 | } |
1143 | |
1144 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1145 | /// |
1146 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi16&expand=5119) |
1147 | #[inline ] |
1148 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1149 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1150 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1151 | pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
1152 | unsafe { |
1153 | let shf: i16x16 = _mm256_shrdv_epi16(a, b, c).as_i16x16(); |
1154 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x16::ZERO)) |
1155 | } |
1156 | } |
1157 | |
1158 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. |
1159 | /// |
1160 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi16&expand=5117) |
1161 | #[inline ] |
1162 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1163 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1164 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1165 | pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
1166 | unsafe { transmute(src:vpshrdvw128(a:b.as_i16x8(), b:a.as_i16x8(), c.as_i16x8())) } |
1167 | } |
1168 | |
1169 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
1170 | /// |
1171 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi16&expand=5115) |
1172 | #[inline ] |
1173 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1174 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1175 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1176 | pub fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { |
1177 | unsafe { |
1178 | let shf: i16x8 = _mm_shrdv_epi16(a, b, c).as_i16x8(); |
1179 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x8())) |
1180 | } |
1181 | } |
1182 | |
1183 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1184 | /// |
1185 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi16&expand=5116) |
1186 | #[inline ] |
1187 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1188 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1189 | #[cfg_attr (test, assert_instr(vpshrdvw))] |
1190 | pub fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
1191 | unsafe { |
1192 | let shf: i16x8 = _mm_shrdv_epi16(a, b, c).as_i16x8(); |
1193 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x8::ZERO)) |
1194 | } |
1195 | } |
1196 | |
1197 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). |
1198 | /// |
1199 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi64&expand=5060) |
1200 | #[inline ] |
1201 | #[target_feature (enable = "avx512vbmi2" )] |
1202 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1203 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1204 | #[rustc_legacy_const_generics (2)] |
1205 | pub fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
1206 | static_assert_uimm_bits!(IMM8, 8); |
1207 | _mm512_shldv_epi64(a, b, c:_mm512_set1_epi64(IMM8 as i64)) |
1208 | } |
1209 | |
1210 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1211 | /// |
1212 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi64&expand=5058) |
1213 | #[inline ] |
1214 | #[target_feature (enable = "avx512vbmi2" )] |
1215 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1216 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1217 | #[rustc_legacy_const_generics (4)] |
1218 | pub fn _mm512_mask_shldi_epi64<const IMM8: i32>( |
1219 | src: __m512i, |
1220 | k: __mmask8, |
1221 | a: __m512i, |
1222 | b: __m512i, |
1223 | ) -> __m512i { |
1224 | unsafe { |
1225 | static_assert_uimm_bits!(IMM8, 8); |
1226 | let shf: i64x8 = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8(); |
1227 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8())) |
1228 | } |
1229 | } |
1230 | |
1231 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1232 | /// |
1233 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi64&expand=5059) |
1234 | #[inline ] |
1235 | #[target_feature (enable = "avx512vbmi2" )] |
1236 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1237 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1238 | #[rustc_legacy_const_generics (3)] |
1239 | pub fn _mm512_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { |
1240 | unsafe { |
1241 | static_assert_uimm_bits!(IMM8, 8); |
1242 | let shf: i64x8 = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8(); |
1243 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO)) |
1244 | } |
1245 | } |
1246 | |
1247 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). |
1248 | /// |
1249 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi64&expand=5057) |
1250 | #[inline ] |
1251 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1252 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1253 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1254 | #[rustc_legacy_const_generics (2)] |
1255 | pub fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
1256 | static_assert_uimm_bits!(IMM8, 8); |
1257 | _mm256_shldv_epi64(a, b, c:_mm256_set1_epi64x(IMM8 as i64)) |
1258 | } |
1259 | |
1260 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1261 | /// |
1262 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi64&expand=5055) |
1263 | #[inline ] |
1264 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1265 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1266 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1267 | #[rustc_legacy_const_generics (4)] |
1268 | pub fn _mm256_mask_shldi_epi64<const IMM8: i32>( |
1269 | src: __m256i, |
1270 | k: __mmask8, |
1271 | a: __m256i, |
1272 | b: __m256i, |
1273 | ) -> __m256i { |
1274 | unsafe { |
1275 | static_assert_uimm_bits!(IMM8, 8); |
1276 | let shf: i64x4 = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4(); |
1277 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4())) |
1278 | } |
1279 | } |
1280 | |
1281 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1282 | /// |
1283 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi64&expand=5056) |
1284 | #[inline ] |
1285 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1286 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1287 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1288 | #[rustc_legacy_const_generics (3)] |
1289 | pub fn _mm256_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
1290 | unsafe { |
1291 | static_assert_uimm_bits!(IMM8, 8); |
1292 | let shf: i64x4 = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4(); |
1293 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO)) |
1294 | } |
1295 | } |
1296 | |
1297 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). |
1298 | /// |
1299 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi64&expand=5054) |
1300 | #[inline ] |
1301 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1302 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1303 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1304 | #[rustc_legacy_const_generics (2)] |
1305 | pub fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
1306 | static_assert_uimm_bits!(IMM8, 8); |
1307 | _mm_shldv_epi64(a, b, c:_mm_set1_epi64x(IMM8 as i64)) |
1308 | } |
1309 | |
1310 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1311 | /// |
1312 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi64&expand=5052) |
1313 | #[inline ] |
1314 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1315 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1316 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1317 | #[rustc_legacy_const_generics (4)] |
1318 | pub fn _mm_mask_shldi_epi64<const IMM8: i32>( |
1319 | src: __m128i, |
1320 | k: __mmask8, |
1321 | a: __m128i, |
1322 | b: __m128i, |
1323 | ) -> __m128i { |
1324 | unsafe { |
1325 | static_assert_uimm_bits!(IMM8, 8); |
1326 | let shf: i64x2 = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2(); |
1327 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2())) |
1328 | } |
1329 | } |
1330 | |
1331 | /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1332 | /// |
1333 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi64&expand=5053) |
1334 | #[inline ] |
1335 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1336 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1337 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] |
1338 | #[rustc_legacy_const_generics (3)] |
1339 | pub fn _mm_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1340 | unsafe { |
1341 | static_assert_uimm_bits!(IMM8, 8); |
1342 | let shf: i64x2 = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2(); |
1343 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO)) |
1344 | } |
1345 | } |
1346 | |
1347 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. |
1348 | /// |
1349 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi32&expand=5051) |
1350 | #[inline ] |
1351 | #[target_feature (enable = "avx512vbmi2" )] |
1352 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1353 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1354 | #[rustc_legacy_const_generics (2)] |
1355 | pub fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
1356 | static_assert_uimm_bits!(IMM8, 8); |
1357 | _mm512_shldv_epi32(a, b, c:_mm512_set1_epi32(IMM8)) |
1358 | } |
1359 | |
1360 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1361 | /// |
1362 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi32&expand=5049) |
1363 | #[inline ] |
1364 | #[target_feature (enable = "avx512vbmi2" )] |
1365 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1366 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1367 | #[rustc_legacy_const_generics (4)] |
1368 | pub fn _mm512_mask_shldi_epi32<const IMM8: i32>( |
1369 | src: __m512i, |
1370 | k: __mmask16, |
1371 | a: __m512i, |
1372 | b: __m512i, |
1373 | ) -> __m512i { |
1374 | unsafe { |
1375 | static_assert_uimm_bits!(IMM8, 8); |
1376 | let shf: i32x16 = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16(); |
1377 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16())) |
1378 | } |
1379 | } |
1380 | |
1381 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1382 | /// |
1383 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi32&expand=5050) |
1384 | #[inline ] |
1385 | #[target_feature (enable = "avx512vbmi2" )] |
1386 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1387 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1388 | #[rustc_legacy_const_generics (3)] |
1389 | pub fn _mm512_maskz_shldi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { |
1390 | unsafe { |
1391 | static_assert_uimm_bits!(IMM8, 8); |
1392 | let shf: i32x16 = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16(); |
1393 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO)) |
1394 | } |
1395 | } |
1396 | |
1397 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. |
1398 | /// |
1399 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi32&expand=5048) |
1400 | #[inline ] |
1401 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1402 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1403 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1404 | #[rustc_legacy_const_generics (2)] |
1405 | pub fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
1406 | static_assert_uimm_bits!(IMM8, 8); |
1407 | _mm256_shldv_epi32(a, b, c:_mm256_set1_epi32(IMM8)) |
1408 | } |
1409 | |
1410 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1411 | /// |
1412 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi32&expand=5046) |
1413 | #[inline ] |
1414 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1415 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1416 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1417 | #[rustc_legacy_const_generics (4)] |
1418 | pub fn _mm256_mask_shldi_epi32<const IMM8: i32>( |
1419 | src: __m256i, |
1420 | k: __mmask8, |
1421 | a: __m256i, |
1422 | b: __m256i, |
1423 | ) -> __m256i { |
1424 | unsafe { |
1425 | static_assert_uimm_bits!(IMM8, 8); |
1426 | let shf: i32x8 = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8(); |
1427 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8())) |
1428 | } |
1429 | } |
1430 | |
1431 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1432 | /// |
1433 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi32&expand=5047) |
1434 | #[inline ] |
1435 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1436 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1437 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1438 | #[rustc_legacy_const_generics (3)] |
1439 | pub fn _mm256_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
1440 | unsafe { |
1441 | static_assert_uimm_bits!(IMM8, 8); |
1442 | let shf: i32x8 = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8(); |
1443 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO)) |
1444 | } |
1445 | } |
1446 | |
1447 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. |
1448 | /// |
1449 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi32&expand=5045) |
1450 | #[inline ] |
1451 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1452 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1453 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1454 | #[rustc_legacy_const_generics (2)] |
1455 | pub fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
1456 | static_assert_uimm_bits!(IMM8, 8); |
1457 | _mm_shldv_epi32(a, b, c:_mm_set1_epi32(IMM8)) |
1458 | } |
1459 | |
1460 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1461 | /// |
1462 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi32&expand=5043) |
1463 | #[inline ] |
1464 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1465 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1466 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1467 | #[rustc_legacy_const_generics (4)] |
1468 | pub fn _mm_mask_shldi_epi32<const IMM8: i32>( |
1469 | src: __m128i, |
1470 | k: __mmask8, |
1471 | a: __m128i, |
1472 | b: __m128i, |
1473 | ) -> __m128i { |
1474 | unsafe { |
1475 | static_assert_uimm_bits!(IMM8, 8); |
1476 | let shf: i32x4 = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4(); |
1477 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4())) |
1478 | } |
1479 | } |
1480 | |
1481 | /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1482 | /// |
1483 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi32&expand=5044) |
1484 | #[inline ] |
1485 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1486 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1487 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] |
1488 | #[rustc_legacy_const_generics (3)] |
1489 | pub fn _mm_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1490 | unsafe { |
1491 | static_assert_uimm_bits!(IMM8, 8); |
1492 | let shf: i32x4 = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4(); |
1493 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO)) |
1494 | } |
1495 | } |
1496 | |
1497 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). |
1498 | /// |
1499 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi16&expand=5042) |
1500 | #[inline ] |
1501 | #[target_feature (enable = "avx512vbmi2" )] |
1502 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1503 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1504 | #[rustc_legacy_const_generics (2)] |
1505 | pub fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
1506 | static_assert_uimm_bits!(IMM8, 8); |
1507 | _mm512_shldv_epi16(a, b, c:_mm512_set1_epi16(IMM8 as i16)) |
1508 | } |
1509 | |
1510 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1511 | /// |
1512 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi16&expand=5040) |
1513 | #[inline ] |
1514 | #[target_feature (enable = "avx512vbmi2" )] |
1515 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1516 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1517 | #[rustc_legacy_const_generics (4)] |
1518 | pub fn _mm512_mask_shldi_epi16<const IMM8: i32>( |
1519 | src: __m512i, |
1520 | k: __mmask32, |
1521 | a: __m512i, |
1522 | b: __m512i, |
1523 | ) -> __m512i { |
1524 | unsafe { |
1525 | static_assert_uimm_bits!(IMM8, 8); |
1526 | let shf: i16x32 = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32(); |
1527 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x32())) |
1528 | } |
1529 | } |
1530 | |
1531 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1532 | /// |
1533 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi16&expand=5041) |
1534 | #[inline ] |
1535 | #[target_feature (enable = "avx512vbmi2" )] |
1536 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1537 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1538 | #[rustc_legacy_const_generics (3)] |
1539 | pub fn _mm512_maskz_shldi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1540 | unsafe { |
1541 | static_assert_uimm_bits!(IMM8, 8); |
1542 | let shf: i16x32 = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32(); |
1543 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x32::ZERO)) |
1544 | } |
1545 | } |
1546 | |
1547 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). |
1548 | /// |
1549 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi16&expand=5039) |
1550 | #[inline ] |
1551 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1552 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1553 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1554 | #[rustc_legacy_const_generics (2)] |
1555 | pub fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
1556 | static_assert_uimm_bits!(IMM8, 8); |
1557 | _mm256_shldv_epi16(a, b, c:_mm256_set1_epi16(IMM8 as i16)) |
1558 | } |
1559 | |
1560 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1561 | /// |
1562 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi16&expand=5037) |
1563 | #[inline ] |
1564 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1565 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1566 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1567 | #[rustc_legacy_const_generics (4)] |
1568 | pub fn _mm256_mask_shldi_epi16<const IMM8: i32>( |
1569 | src: __m256i, |
1570 | k: __mmask16, |
1571 | a: __m256i, |
1572 | b: __m256i, |
1573 | ) -> __m256i { |
1574 | unsafe { |
1575 | static_assert_uimm_bits!(IMM8, 8); |
1576 | let shf: i16x16 = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16(); |
1577 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x16())) |
1578 | } |
1579 | } |
1580 | |
1581 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1582 | /// |
1583 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi16&expand=5038) |
1584 | #[inline ] |
1585 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1586 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1587 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1588 | #[rustc_legacy_const_generics (3)] |
1589 | pub fn _mm256_maskz_shldi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
1590 | unsafe { |
1591 | static_assert_uimm_bits!(IMM8, 8); |
1592 | let shf: i16x16 = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16(); |
1593 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x16::ZERO)) |
1594 | } |
1595 | } |
1596 | |
1597 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). |
1598 | /// |
1599 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi16&expand=5036) |
1600 | #[inline ] |
1601 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1602 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1603 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1604 | #[rustc_legacy_const_generics (2)] |
1605 | pub fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
1606 | static_assert_uimm_bits!(IMM8, 8); |
1607 | _mm_shldv_epi16(a, b, c:_mm_set1_epi16(IMM8 as i16)) |
1608 | } |
1609 | |
1610 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1611 | /// |
1612 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi16&expand=5034) |
1613 | #[inline ] |
1614 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1615 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1616 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1617 | #[rustc_legacy_const_generics (4)] |
1618 | pub fn _mm_mask_shldi_epi16<const IMM8: i32>( |
1619 | src: __m128i, |
1620 | k: __mmask8, |
1621 | a: __m128i, |
1622 | b: __m128i, |
1623 | ) -> __m128i { |
1624 | unsafe { |
1625 | static_assert_uimm_bits!(IMM8, 8); |
1626 | let shf: i16x8 = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8(); |
1627 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x8())) |
1628 | } |
1629 | } |
1630 | |
1631 | /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1632 | /// |
1633 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi16&expand=5035) |
1634 | #[inline ] |
1635 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1636 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1637 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] |
1638 | #[rustc_legacy_const_generics (3)] |
1639 | pub fn _mm_maskz_shldi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1640 | unsafe { |
1641 | static_assert_uimm_bits!(IMM8, 8); |
1642 | let shf: i16x8 = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8(); |
1643 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x8::ZERO)) |
1644 | } |
1645 | } |
1646 | |
1647 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. |
1648 | /// |
1649 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi64&expand=5114) |
1650 | #[inline ] |
1651 | #[target_feature (enable = "avx512vbmi2" )] |
1652 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1653 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1654 | #[rustc_legacy_const_generics (2)] |
1655 | pub fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
1656 | static_assert_uimm_bits!(IMM8, 8); |
1657 | _mm512_shrdv_epi64(a, b, c:_mm512_set1_epi64(IMM8 as i64)) |
1658 | } |
1659 | |
1660 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set). |
1661 | /// |
1662 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi64&expand=5112) |
1663 | #[inline ] |
1664 | #[target_feature (enable = "avx512vbmi2" )] |
1665 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1666 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1667 | #[rustc_legacy_const_generics (4)] |
1668 | pub fn _mm512_mask_shrdi_epi64<const IMM8: i32>( |
1669 | src: __m512i, |
1670 | k: __mmask8, |
1671 | a: __m512i, |
1672 | b: __m512i, |
1673 | ) -> __m512i { |
1674 | unsafe { |
1675 | static_assert_uimm_bits!(IMM8, 8); |
1676 | let shf: i64x8 = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8(); |
1677 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8())) |
1678 | } |
1679 | } |
1680 | |
1681 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1682 | /// |
1683 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi64&expand=5113) |
1684 | #[inline ] |
1685 | #[target_feature (enable = "avx512vbmi2" )] |
1686 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1687 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq |
1688 | #[rustc_legacy_const_generics (3)] |
1689 | pub fn _mm512_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { |
1690 | unsafe { |
1691 | static_assert_uimm_bits!(IMM8, 8); |
1692 | let shf: i64x8 = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8(); |
1693 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO)) |
1694 | } |
1695 | } |
1696 | |
1697 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. |
1698 | /// |
1699 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi64&expand=5111) |
1700 | #[inline ] |
1701 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1702 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1703 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1704 | #[rustc_legacy_const_generics (2)] |
1705 | pub fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
1706 | static_assert_uimm_bits!(IMM8, 8); |
1707 | _mm256_shrdv_epi64(a, b, c:_mm256_set1_epi64x(IMM8 as i64)) |
1708 | } |
1709 | |
1710 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set). |
1711 | /// |
1712 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi64&expand=5109) |
1713 | #[inline ] |
1714 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1715 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1716 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1717 | #[rustc_legacy_const_generics (4)] |
1718 | pub fn _mm256_mask_shrdi_epi64<const IMM8: i32>( |
1719 | src: __m256i, |
1720 | k: __mmask8, |
1721 | a: __m256i, |
1722 | b: __m256i, |
1723 | ) -> __m256i { |
1724 | unsafe { |
1725 | static_assert_uimm_bits!(IMM8, 8); |
1726 | let shf: i64x4 = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4(); |
1727 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4())) |
1728 | } |
1729 | } |
1730 | |
1731 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1732 | /// |
1733 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi64&expand=5110) |
1734 | #[inline ] |
1735 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1736 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1737 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1738 | #[rustc_legacy_const_generics (3)] |
1739 | pub fn _mm256_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
1740 | unsafe { |
1741 | static_assert_uimm_bits!(IMM8, 8); |
1742 | let shf: i64x4 = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4(); |
1743 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO)) |
1744 | } |
1745 | } |
1746 | |
1747 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. |
1748 | /// |
1749 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi64&expand=5108) |
1750 | #[inline ] |
1751 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1752 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1753 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1754 | #[rustc_legacy_const_generics (2)] |
1755 | pub fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
1756 | static_assert_uimm_bits!(IMM8, 8); |
1757 | _mm_shrdv_epi64(a, b, c:_mm_set1_epi64x(IMM8 as i64)) |
1758 | } |
1759 | |
1760 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set). |
1761 | /// |
1762 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi64&expand=5106) |
1763 | #[inline ] |
1764 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1765 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1766 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1767 | #[rustc_legacy_const_generics (4)] |
1768 | pub fn _mm_mask_shrdi_epi64<const IMM8: i32>( |
1769 | src: __m128i, |
1770 | k: __mmask8, |
1771 | a: __m128i, |
1772 | b: __m128i, |
1773 | ) -> __m128i { |
1774 | unsafe { |
1775 | static_assert_uimm_bits!(IMM8, 8); |
1776 | let shf: i64x2 = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2(); |
1777 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2())) |
1778 | } |
1779 | } |
1780 | |
1781 | /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1782 | /// |
1783 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi64&expand=5107) |
1784 | #[inline ] |
1785 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1786 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1787 | #[cfg_attr (test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq |
1788 | #[rustc_legacy_const_generics (3)] |
1789 | pub fn _mm_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1790 | unsafe { |
1791 | static_assert_uimm_bits!(IMM8, 8); |
1792 | let shf: i64x2 = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2(); |
1793 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO)) |
1794 | } |
1795 | } |
1796 | |
1797 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. |
1798 | /// |
1799 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi32&expand=5105) |
1800 | #[inline ] |
1801 | #[target_feature (enable = "avx512vbmi2" )] |
1802 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1803 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
1804 | #[rustc_legacy_const_generics (2)] |
1805 | pub fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
1806 | static_assert_uimm_bits!(IMM8, 8); |
1807 | _mm512_shrdv_epi32(a, b, c:_mm512_set1_epi32(IMM8)) |
1808 | } |
1809 | |
1810 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1811 | /// |
1812 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi32&expand=5103) |
1813 | #[inline ] |
1814 | #[target_feature (enable = "avx512vbmi2" )] |
1815 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1816 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
1817 | #[rustc_legacy_const_generics (4)] |
1818 | pub fn _mm512_mask_shrdi_epi32<const IMM8: i32>( |
1819 | src: __m512i, |
1820 | k: __mmask16, |
1821 | a: __m512i, |
1822 | b: __m512i, |
1823 | ) -> __m512i { |
1824 | unsafe { |
1825 | static_assert_uimm_bits!(IMM8, 8); |
1826 | let shf: i32x16 = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16(); |
1827 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16())) |
1828 | } |
1829 | } |
1830 | |
1831 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1832 | /// |
1833 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi32&expand=5104) |
1834 | #[inline ] |
1835 | #[target_feature (enable = "avx512vbmi2" )] |
1836 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1837 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
1838 | #[rustc_legacy_const_generics (3)] |
1839 | pub fn _mm512_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { |
1840 | unsafe { |
1841 | static_assert_uimm_bits!(IMM8, 8); |
1842 | let shf: i32x16 = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16(); |
1843 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO)) |
1844 | } |
1845 | } |
1846 | |
1847 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. |
1848 | /// |
1849 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi32&expand=5102) |
1850 | #[inline ] |
1851 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1852 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1853 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
1854 | #[rustc_legacy_const_generics (2)] |
1855 | pub fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
1856 | static_assert_uimm_bits!(IMM8, 8); |
1857 | _mm256_shrdv_epi32(a, b, c:_mm256_set1_epi32(IMM8)) |
1858 | } |
1859 | |
1860 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1861 | /// |
1862 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi32&expand=5100) |
1863 | #[inline ] |
1864 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1865 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1866 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
1867 | #[rustc_legacy_const_generics (4)] |
1868 | pub fn _mm256_mask_shrdi_epi32<const IMM8: i32>( |
1869 | src: __m256i, |
1870 | k: __mmask8, |
1871 | a: __m256i, |
1872 | b: __m256i, |
1873 | ) -> __m256i { |
1874 | unsafe { |
1875 | static_assert_uimm_bits!(IMM8, 8); |
1876 | let shf: i32x8 = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8(); |
1877 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8())) |
1878 | } |
1879 | } |
1880 | |
1881 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1882 | /// |
1883 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi32&expand=5101) |
1884 | #[inline ] |
1885 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1886 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1887 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
1888 | #[rustc_legacy_const_generics (3)] |
1889 | pub fn _mm256_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
1890 | unsafe { |
1891 | static_assert_uimm_bits!(IMM8, 8); |
1892 | let shf: i32x8 = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8(); |
1893 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO)) |
1894 | } |
1895 | } |
1896 | |
1897 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. |
1898 | /// |
1899 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi32&expand=5099) |
1900 | #[inline ] |
1901 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1902 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1903 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
1904 | #[rustc_legacy_const_generics (2)] |
1905 | pub fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
1906 | static_assert_uimm_bits!(IMM8, 8); |
1907 | _mm_shrdv_epi32(a, b, c:_mm_set1_epi32(IMM8)) |
1908 | } |
1909 | |
1910 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1911 | /// |
1912 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi32&expand=5097) |
1913 | #[inline ] |
1914 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1915 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1916 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
1917 | #[rustc_legacy_const_generics (4)] |
1918 | pub fn _mm_mask_shrdi_epi32<const IMM8: i32>( |
1919 | src: __m128i, |
1920 | k: __mmask8, |
1921 | a: __m128i, |
1922 | b: __m128i, |
1923 | ) -> __m128i { |
1924 | unsafe { |
1925 | static_assert_uimm_bits!(IMM8, 8); |
1926 | let shf: i32x4 = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4(); |
1927 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4())) |
1928 | } |
1929 | } |
1930 | |
1931 | /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1932 | /// |
1933 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi32&expand=5098) |
1934 | #[inline ] |
1935 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
1936 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1937 | #[cfg_attr (test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd |
1938 | #[rustc_legacy_const_generics (3)] |
1939 | pub fn _mm_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1940 | unsafe { |
1941 | static_assert_uimm_bits!(IMM8, 8); |
1942 | let shf: i32x4 = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4(); |
1943 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO)) |
1944 | } |
1945 | } |
1946 | |
1947 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. |
1948 | /// |
1949 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi16&expand=5096) |
1950 | #[inline ] |
1951 | #[target_feature (enable = "avx512vbmi2" )] |
1952 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1953 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
1954 | #[rustc_legacy_const_generics (2)] |
1955 | pub fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
1956 | static_assert_uimm_bits!(IMM8, 8); |
1957 | _mm512_shrdv_epi16(a, b, c:_mm512_set1_epi16(IMM8 as i16)) |
1958 | } |
1959 | |
1960 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1961 | /// |
1962 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi16&expand=5094) |
1963 | #[inline ] |
1964 | #[target_feature (enable = "avx512vbmi2" )] |
1965 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1966 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
1967 | #[rustc_legacy_const_generics (4)] |
1968 | pub fn _mm512_mask_shrdi_epi16<const IMM8: i32>( |
1969 | src: __m512i, |
1970 | k: __mmask32, |
1971 | a: __m512i, |
1972 | b: __m512i, |
1973 | ) -> __m512i { |
1974 | unsafe { |
1975 | static_assert_uimm_bits!(IMM8, 8); |
1976 | let shf: i16x32 = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32(); |
1977 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x32())) |
1978 | } |
1979 | } |
1980 | |
1981 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1982 | /// |
1983 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi16&expand=5095) |
1984 | #[inline ] |
1985 | #[target_feature (enable = "avx512vbmi2" )] |
1986 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1987 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
1988 | #[rustc_legacy_const_generics (3)] |
1989 | pub fn _mm512_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1990 | unsafe { |
1991 | static_assert_uimm_bits!(IMM8, 8); |
1992 | let shf: i16x32 = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32(); |
1993 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x32::ZERO)) |
1994 | } |
1995 | } |
1996 | |
1997 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. |
1998 | /// |
1999 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi16&expand=5093) |
2000 | #[inline ] |
2001 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2002 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2003 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2004 | #[rustc_legacy_const_generics (2)] |
2005 | pub fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
2006 | static_assert_uimm_bits!(IMM8, 8); |
2007 | _mm256_shrdv_epi16(a, b, c:_mm256_set1_epi16(IMM8 as i16)) |
2008 | } |
2009 | |
2010 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2011 | /// |
2012 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi16&expand=5091) |
2013 | #[inline ] |
2014 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2015 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2016 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2017 | #[rustc_legacy_const_generics (4)] |
2018 | pub fn _mm256_mask_shrdi_epi16<const IMM8: i32>( |
2019 | src: __m256i, |
2020 | k: __mmask16, |
2021 | a: __m256i, |
2022 | b: __m256i, |
2023 | ) -> __m256i { |
2024 | unsafe { |
2025 | static_assert_uimm_bits!(IMM8, 8); |
2026 | let shf: i16x16 = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16(); |
2027 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x16())) |
2028 | } |
2029 | } |
2030 | |
2031 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2032 | /// |
2033 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi16&expand=5092) |
2034 | #[inline ] |
2035 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2036 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2037 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2038 | #[rustc_legacy_const_generics (3)] |
2039 | pub fn _mm256_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
2040 | unsafe { |
2041 | static_assert_uimm_bits!(IMM8, 8); |
2042 | let shf: i16x16 = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16(); |
2043 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x16::ZERO)) |
2044 | } |
2045 | } |
2046 | |
2047 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. |
2048 | /// |
2049 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi16&expand=5090) |
2050 | #[inline ] |
2051 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2052 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2053 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2054 | #[rustc_legacy_const_generics (2)] |
2055 | pub fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
2056 | static_assert_uimm_bits!(IMM8, 8); |
2057 | _mm_shrdv_epi16(a, b, c:_mm_set1_epi16(IMM8 as i16)) |
2058 | } |
2059 | |
2060 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2061 | /// |
2062 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi16&expand=5088) |
2063 | #[inline ] |
2064 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2065 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2066 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2067 | #[rustc_legacy_const_generics (4)] |
2068 | pub fn _mm_mask_shrdi_epi16<const IMM8: i32>( |
2069 | src: __m128i, |
2070 | k: __mmask8, |
2071 | a: __m128i, |
2072 | b: __m128i, |
2073 | ) -> __m128i { |
2074 | unsafe { |
2075 | static_assert_uimm_bits!(IMM8, 8); |
2076 | let shf: i16x8 = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8(); |
2077 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x8())) |
2078 | } |
2079 | } |
2080 | |
2081 | /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2082 | /// |
2083 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi16&expand=5089) |
2084 | #[inline ] |
2085 | #[target_feature (enable = "avx512vbmi2,avx512vl" )] |
2086 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2087 | #[cfg_attr (test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw |
2088 | #[rustc_legacy_const_generics (3)] |
2089 | pub fn _mm_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
2090 | unsafe { |
2091 | static_assert_uimm_bits!(IMM8, 8); |
2092 | let shf: i16x8 = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8(); |
2093 | transmute(src:simd_select_bitmask(m:k, yes:shf, no:i16x8::ZERO)) |
2094 | } |
2095 | } |
2096 | |
2097 | #[allow (improper_ctypes)] |
2098 | unsafe extern "C" { |
2099 | #[link_name = "llvm.x86.avx512.mask.compress.store.w.512" ] |
2100 | unsafefn vcompressstorew(mem: *mut i8, data: i16x32, mask: u32); |
2101 | #[link_name = "llvm.x86.avx512.mask.compress.store.w.256" ] |
2102 | unsafefn vcompressstorew256(mem: *mut i8, data: i16x16, mask: u16); |
2103 | #[link_name = "llvm.x86.avx512.mask.compress.store.w.128" ] |
2104 | unsafefn vcompressstorew128(mem: *mut i8, data: i16x8, mask: u8); |
2105 | |
2106 | #[link_name = "llvm.x86.avx512.mask.compress.store.b.512" ] |
2107 | unsafefn vcompressstoreb(mem: *mut i8, data: i8x64, mask: u64); |
2108 | #[link_name = "llvm.x86.avx512.mask.compress.store.b.256" ] |
2109 | unsafefn vcompressstoreb256(mem: *mut i8, data: i8x32, mask: u32); |
2110 | #[link_name = "llvm.x86.avx512.mask.compress.store.b.128" ] |
2111 | unsafefn vcompressstoreb128(mem: *mut i8, data: i8x16, mask: u16); |
2112 | |
2113 | #[link_name = "llvm.x86.avx512.mask.compress.w.512" ] |
2114 | unsafefn vpcompressw(a: i16x32, src: i16x32, mask: u32) -> i16x32; |
2115 | #[link_name = "llvm.x86.avx512.mask.compress.w.256" ] |
2116 | unsafefn vpcompressw256(a: i16x16, src: i16x16, mask: u16) -> i16x16; |
2117 | #[link_name = "llvm.x86.avx512.mask.compress.w.128" ] |
2118 | unsafefn vpcompressw128(a: i16x8, src: i16x8, mask: u8) -> i16x8; |
2119 | |
2120 | #[link_name = "llvm.x86.avx512.mask.compress.b.512" ] |
2121 | unsafefn vpcompressb(a: i8x64, src: i8x64, mask: u64) -> i8x64; |
2122 | #[link_name = "llvm.x86.avx512.mask.compress.b.256" ] |
2123 | unsafefn vpcompressb256(a: i8x32, src: i8x32, mask: u32) -> i8x32; |
2124 | #[link_name = "llvm.x86.avx512.mask.compress.b.128" ] |
2125 | unsafefn vpcompressb128(a: i8x16, src: i8x16, mask: u16) -> i8x16; |
2126 | |
2127 | #[link_name = "llvm.x86.avx512.mask.expand.w.512" ] |
2128 | unsafefn vpexpandw(a: i16x32, src: i16x32, mask: u32) -> i16x32; |
2129 | #[link_name = "llvm.x86.avx512.mask.expand.w.256" ] |
2130 | unsafefn vpexpandw256(a: i16x16, src: i16x16, mask: u16) -> i16x16; |
2131 | #[link_name = "llvm.x86.avx512.mask.expand.w.128" ] |
2132 | unsafefn vpexpandw128(a: i16x8, src: i16x8, mask: u8) -> i16x8; |
2133 | |
2134 | #[link_name = "llvm.x86.avx512.mask.expand.b.512" ] |
2135 | unsafefn vpexpandb(a: i8x64, src: i8x64, mask: u64) -> i8x64; |
2136 | #[link_name = "llvm.x86.avx512.mask.expand.b.256" ] |
2137 | unsafefn vpexpandb256(a: i8x32, src: i8x32, mask: u32) -> i8x32; |
2138 | #[link_name = "llvm.x86.avx512.mask.expand.b.128" ] |
2139 | unsafefn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16; |
2140 | |
2141 | #[link_name = "llvm.fshl.v8i64" ] |
2142 | unsafefn vpshldvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8; |
2143 | #[link_name = "llvm.fshl.v4i64" ] |
2144 | unsafefn vpshldvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4; |
2145 | #[link_name = "llvm.fshl.v2i64" ] |
2146 | unsafefn vpshldvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2; |
2147 | #[link_name = "llvm.fshl.v16i32" ] |
2148 | unsafefn vpshldvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16; |
2149 | #[link_name = "llvm.fshl.v8i32" ] |
2150 | unsafefn vpshldvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8; |
2151 | #[link_name = "llvm.fshl.v4i32" ] |
2152 | unsafefn vpshldvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4; |
2153 | #[link_name = "llvm.fshl.v32i16" ] |
2154 | unsafefn vpshldvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32; |
2155 | #[link_name = "llvm.fshl.v16i16" ] |
2156 | unsafefn vpshldvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16; |
2157 | #[link_name = "llvm.fshl.v8i16" ] |
2158 | unsafefn vpshldvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8; |
2159 | |
2160 | #[link_name = "llvm.fshr.v8i64" ] |
2161 | unsafefn vpshrdvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8; |
2162 | #[link_name = "llvm.fshr.v4i64" ] |
2163 | unsafefn vpshrdvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4; |
2164 | #[link_name = "llvm.fshr.v2i64" ] |
2165 | unsafefn vpshrdvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2; |
2166 | #[link_name = "llvm.fshr.v16i32" ] |
2167 | unsafefn vpshrdvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16; |
2168 | #[link_name = "llvm.fshr.v8i32" ] |
2169 | unsafefn vpshrdvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8; |
2170 | #[link_name = "llvm.fshr.v4i32" ] |
2171 | unsafefn vpshrdvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4; |
2172 | #[link_name = "llvm.fshr.v32i16" ] |
2173 | unsafefn vpshrdvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32; |
2174 | #[link_name = "llvm.fshr.v16i16" ] |
2175 | unsafefn vpshrdvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16; |
2176 | #[link_name = "llvm.fshr.v8i16" ] |
2177 | unsafefn vpshrdvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8; |
2178 | |
2179 | #[link_name = "llvm.x86.avx512.mask.expand.load.b.128" ] |
2180 | unsafefn expandloadb_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16; |
2181 | #[link_name = "llvm.x86.avx512.mask.expand.load.w.128" ] |
2182 | unsafefn expandloadw_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8; |
2183 | #[link_name = "llvm.x86.avx512.mask.expand.load.b.256" ] |
2184 | unsafefn expandloadb_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32; |
2185 | #[link_name = "llvm.x86.avx512.mask.expand.load.w.256" ] |
2186 | unsafefn expandloadw_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16; |
2187 | #[link_name = "llvm.x86.avx512.mask.expand.load.b.512" ] |
2188 | unsafefn expandloadb_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64; |
2189 | #[link_name = "llvm.x86.avx512.mask.expand.load.w.512" ] |
2190 | unsafefn expandloadw_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32; |
2191 | } |
2192 | |
2193 | #[cfg (test)] |
2194 | mod tests { |
2195 | |
2196 | use stdarch_test::simd_test; |
2197 | |
2198 | use crate::core_arch::x86::*; |
2199 | use crate::hint::black_box; |
2200 | |
2201 | #[simd_test(enable = "avx512vbmi2" )] |
2202 | unsafe fn test_mm512_mask_compress_epi16() { |
2203 | let src = _mm512_set1_epi16(200); |
2204 | #[rustfmt::skip] |
2205 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2206 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2207 | let r = _mm512_mask_compress_epi16(src, 0b01010101_01010101_01010101_01010101, a); |
2208 | #[rustfmt::skip] |
2209 | let e = _mm512_set_epi16( |
2210 | 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, |
2211 | 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, |
2212 | ); |
2213 | assert_eq_m512i(r, e); |
2214 | } |
2215 | |
2216 | #[simd_test(enable = "avx512vbmi2" )] |
2217 | unsafe fn test_mm512_maskz_compress_epi16() { |
2218 | #[rustfmt::skip] |
2219 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2220 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2221 | let r = _mm512_maskz_compress_epi16(0b01010101_01010101_01010101_01010101, a); |
2222 | #[rustfmt::skip] |
2223 | let e = _mm512_set_epi16( |
2224 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2225 | 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, |
2226 | ); |
2227 | assert_eq_m512i(r, e); |
2228 | } |
2229 | |
2230 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2231 | unsafe fn test_mm256_mask_compress_epi16() { |
2232 | let src = _mm256_set1_epi16(200); |
2233 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2234 | let r = _mm256_mask_compress_epi16(src, 0b01010101_01010101, a); |
2235 | let e = _mm256_set_epi16( |
2236 | 200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15, |
2237 | ); |
2238 | assert_eq_m256i(r, e); |
2239 | } |
2240 | |
2241 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2242 | unsafe fn test_mm256_maskz_compress_epi16() { |
2243 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2244 | let r = _mm256_maskz_compress_epi16(0b01010101_01010101, a); |
2245 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15); |
2246 | assert_eq_m256i(r, e); |
2247 | } |
2248 | |
2249 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2250 | unsafe fn test_mm_mask_compress_epi16() { |
2251 | let src = _mm_set1_epi16(200); |
2252 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
2253 | let r = _mm_mask_compress_epi16(src, 0b01010101, a); |
2254 | let e = _mm_set_epi16(200, 200, 200, 200, 1, 3, 5, 7); |
2255 | assert_eq_m128i(r, e); |
2256 | } |
2257 | |
2258 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2259 | unsafe fn test_mm_maskz_compress_epi16() { |
2260 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
2261 | let r = _mm_maskz_compress_epi16(0b01010101, a); |
2262 | let e = _mm_set_epi16(0, 0, 0, 0, 1, 3, 5, 7); |
2263 | assert_eq_m128i(r, e); |
2264 | } |
2265 | |
2266 | #[simd_test(enable = "avx512vbmi2" )] |
2267 | unsafe fn test_mm512_mask_compress_epi8() { |
2268 | let src = _mm512_set1_epi8(100); |
2269 | #[rustfmt::skip] |
2270 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2271 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
2272 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
2273 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
2274 | let r = _mm512_mask_compress_epi8( |
2275 | src, |
2276 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101, |
2277 | a, |
2278 | ); |
2279 | #[rustfmt::skip] |
2280 | let e = _mm512_set_epi8( |
2281 | 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, |
2282 | 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, |
2283 | 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, |
2284 | 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, |
2285 | ); |
2286 | assert_eq_m512i(r, e); |
2287 | } |
2288 | |
2289 | #[simd_test(enable = "avx512vbmi2" )] |
2290 | unsafe fn test_mm512_maskz_compress_epi8() { |
2291 | #[rustfmt::skip] |
2292 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2293 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
2294 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
2295 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
2296 | let r = _mm512_maskz_compress_epi8( |
2297 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101, |
2298 | a, |
2299 | ); |
2300 | #[rustfmt::skip] |
2301 | let e = _mm512_set_epi8( |
2302 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2303 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2304 | 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, |
2305 | 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, |
2306 | ); |
2307 | assert_eq_m512i(r, e); |
2308 | } |
2309 | |
2310 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2311 | unsafe fn test_mm256_mask_compress_epi8() { |
2312 | let src = _mm256_set1_epi8(100); |
2313 | #[rustfmt::skip] |
2314 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2315 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2316 | let r = _mm256_mask_compress_epi8(src, 0b01010101_01010101_01010101_01010101, a); |
2317 | #[rustfmt::skip] |
2318 | let e = _mm256_set_epi8( |
2319 | 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, |
2320 | 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, |
2321 | ); |
2322 | assert_eq_m256i(r, e); |
2323 | } |
2324 | |
2325 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2326 | unsafe fn test_mm256_maskz_compress_epi8() { |
2327 | #[rustfmt::skip] |
2328 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2329 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2330 | let r = _mm256_maskz_compress_epi8(0b01010101_01010101_01010101_01010101, a); |
2331 | #[rustfmt::skip] |
2332 | let e = _mm256_set_epi8( |
2333 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2334 | 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, |
2335 | ); |
2336 | assert_eq_m256i(r, e); |
2337 | } |
2338 | |
2339 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2340 | unsafe fn test_mm_mask_compress_epi8() { |
2341 | let src = _mm_set1_epi8(100); |
2342 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2343 | let r = _mm_mask_compress_epi8(src, 0b01010101_01010101, a); |
2344 | let e = _mm_set_epi8( |
2345 | 100, 100, 100, 100, 100, 100, 100, 100, 1, 3, 5, 7, 9, 11, 13, 15, |
2346 | ); |
2347 | assert_eq_m128i(r, e); |
2348 | } |
2349 | |
2350 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2351 | unsafe fn test_mm_maskz_compress_epi8() { |
2352 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2353 | let r = _mm_maskz_compress_epi8(0b01010101_01010101, a); |
2354 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15); |
2355 | assert_eq_m128i(r, e); |
2356 | } |
2357 | |
2358 | #[simd_test(enable = "avx512vbmi2" )] |
2359 | unsafe fn test_mm512_mask_expand_epi16() { |
2360 | let src = _mm512_set1_epi16(200); |
2361 | #[rustfmt::skip] |
2362 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2363 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2364 | let r = _mm512_mask_expand_epi16(src, 0b01010101_01010101_01010101_01010101, a); |
2365 | #[rustfmt::skip] |
2366 | let e = _mm512_set_epi16( |
2367 | 200, 16, 200, 17, 200, 18, 200, 19, 200, 20, 200, 21, 200, 22, 200, 23, |
2368 | 200, 24, 200, 25, 200, 26, 200, 27, 200, 28, 200, 29, 200, 30, 200, 31, |
2369 | ); |
2370 | assert_eq_m512i(r, e); |
2371 | } |
2372 | |
2373 | #[simd_test(enable = "avx512vbmi2" )] |
2374 | unsafe fn test_mm512_maskz_expand_epi16() { |
2375 | #[rustfmt::skip] |
2376 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2377 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2378 | let r = _mm512_maskz_expand_epi16(0b01010101_01010101_01010101_01010101, a); |
2379 | #[rustfmt::skip] |
2380 | let e = _mm512_set_epi16(0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, |
2381 | 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31); |
2382 | assert_eq_m512i(r, e); |
2383 | } |
2384 | |
2385 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2386 | unsafe fn test_mm256_mask_expand_epi16() { |
2387 | let src = _mm256_set1_epi16(200); |
2388 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2389 | let r = _mm256_mask_expand_epi16(src, 0b01010101_01010101, a); |
2390 | let e = _mm256_set_epi16( |
2391 | 200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15, |
2392 | ); |
2393 | assert_eq_m256i(r, e); |
2394 | } |
2395 | |
2396 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2397 | unsafe fn test_mm256_maskz_expand_epi16() { |
2398 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2399 | let r = _mm256_maskz_expand_epi16(0b01010101_01010101, a); |
2400 | let e = _mm256_set_epi16(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15); |
2401 | assert_eq_m256i(r, e); |
2402 | } |
2403 | |
2404 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2405 | unsafe fn test_mm_mask_expand_epi16() { |
2406 | let src = _mm_set1_epi16(200); |
2407 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
2408 | let r = _mm_mask_expand_epi16(src, 0b01010101, a); |
2409 | let e = _mm_set_epi16(200, 4, 200, 5, 200, 6, 200, 7); |
2410 | assert_eq_m128i(r, e); |
2411 | } |
2412 | |
2413 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2414 | unsafe fn test_mm_maskz_expand_epi16() { |
2415 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
2416 | let r = _mm_maskz_expand_epi16(0b01010101, a); |
2417 | let e = _mm_set_epi16(0, 4, 0, 5, 0, 6, 0, 7); |
2418 | assert_eq_m128i(r, e); |
2419 | } |
2420 | |
2421 | #[simd_test(enable = "avx512vbmi2" )] |
2422 | unsafe fn test_mm512_mask_expand_epi8() { |
2423 | let src = _mm512_set1_epi8(100); |
2424 | #[rustfmt::skip] |
2425 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2426 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
2427 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
2428 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
2429 | let r = _mm512_mask_expand_epi8( |
2430 | src, |
2431 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101, |
2432 | a, |
2433 | ); |
2434 | #[rustfmt::skip] |
2435 | let e = _mm512_set_epi8( |
2436 | 100, 32, 100, 33, 100, 34, 100, 35, 100, 36, 100, 37, 100, 38, 100, 39, |
2437 | 100, 40, 100, 41, 100, 42, 100, 43, 100, 44, 100, 45, 100, 46, 100, 47, |
2438 | 100, 48, 100, 49, 100, 50, 100, 51, 100, 52, 100, 53, 100, 54, 100, 55, |
2439 | 100, 56, 100, 57, 100, 58, 100, 59, 100, 60, 100, 61, 100, 62, 100, 63, |
2440 | ); |
2441 | assert_eq_m512i(r, e); |
2442 | } |
2443 | |
2444 | #[simd_test(enable = "avx512vbmi2" )] |
2445 | unsafe fn test_mm512_maskz_expand_epi8() { |
2446 | #[rustfmt::skip] |
2447 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2448 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
2449 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
2450 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
2451 | let r = _mm512_maskz_expand_epi8( |
2452 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101, |
2453 | a, |
2454 | ); |
2455 | #[rustfmt::skip] |
2456 | let e = _mm512_set_epi8( |
2457 | 0, 32, 0, 33, 0, 34, 0, 35, 0, 36, 0, 37, 0, 38, 0, 39, |
2458 | 0, 40, 0, 41, 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, 0, 47, |
2459 | 0, 48, 0, 49, 0, 50, 0, 51, 0, 52, 0, 53, 0, 54, 0, 55, |
2460 | 0, 56, 0, 57, 0, 58, 0, 59, 0, 60, 0, 61, 0, 62, 0, 63, |
2461 | ); |
2462 | assert_eq_m512i(r, e); |
2463 | } |
2464 | |
2465 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2466 | unsafe fn test_mm256_mask_expand_epi8() { |
2467 | let src = _mm256_set1_epi8(100); |
2468 | #[rustfmt::skip] |
2469 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2470 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2471 | let r = _mm256_mask_expand_epi8(src, 0b01010101_01010101_01010101_01010101, a); |
2472 | #[rustfmt::skip] |
2473 | let e = _mm256_set_epi8( |
2474 | 100, 16, 100, 17, 100, 18, 100, 19, 100, 20, 100, 21, 100, 22, 100, 23, |
2475 | 100, 24, 100, 25, 100, 26, 100, 27, 100, 28, 100, 29, 100, 30, 100, 31, |
2476 | ); |
2477 | assert_eq_m256i(r, e); |
2478 | } |
2479 | |
2480 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2481 | unsafe fn test_mm256_maskz_expand_epi8() { |
2482 | #[rustfmt::skip] |
2483 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
2484 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
2485 | let r = _mm256_maskz_expand_epi8(0b01010101_01010101_01010101_01010101, a); |
2486 | #[rustfmt::skip] |
2487 | let e = _mm256_set_epi8( |
2488 | 0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, |
2489 | 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, |
2490 | ); |
2491 | assert_eq_m256i(r, e); |
2492 | } |
2493 | |
2494 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2495 | unsafe fn test_mm_mask_expand_epi8() { |
2496 | let src = _mm_set1_epi8(100); |
2497 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2498 | let r = _mm_mask_expand_epi8(src, 0b01010101_01010101, a); |
2499 | let e = _mm_set_epi8( |
2500 | 100, 8, 100, 9, 100, 10, 100, 11, 100, 12, 100, 13, 100, 14, 100, 15, |
2501 | ); |
2502 | assert_eq_m128i(r, e); |
2503 | } |
2504 | |
2505 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2506 | unsafe fn test_mm_maskz_expand_epi8() { |
2507 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2508 | let r = _mm_maskz_expand_epi8(0b01010101_01010101, a); |
2509 | let e = _mm_set_epi8(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15); |
2510 | assert_eq_m128i(r, e); |
2511 | } |
2512 | |
2513 | #[simd_test(enable = "avx512vbmi2" )] |
2514 | unsafe fn test_mm512_shldv_epi64() { |
2515 | let a = _mm512_set1_epi64(1); |
2516 | let b = _mm512_set1_epi64(1 << 63); |
2517 | let c = _mm512_set1_epi64(2); |
2518 | let r = _mm512_shldv_epi64(a, b, c); |
2519 | let e = _mm512_set1_epi64(6); |
2520 | assert_eq_m512i(r, e); |
2521 | } |
2522 | |
2523 | #[simd_test(enable = "avx512vbmi2" )] |
2524 | unsafe fn test_mm512_mask_shldv_epi64() { |
2525 | let a = _mm512_set1_epi64(1); |
2526 | let b = _mm512_set1_epi64(1 << 63); |
2527 | let c = _mm512_set1_epi64(2); |
2528 | let r = _mm512_mask_shldv_epi64(a, 0, b, c); |
2529 | assert_eq_m512i(r, a); |
2530 | let r = _mm512_mask_shldv_epi64(a, 0b11111111, b, c); |
2531 | let e = _mm512_set1_epi64(6); |
2532 | assert_eq_m512i(r, e); |
2533 | } |
2534 | |
2535 | #[simd_test(enable = "avx512vbmi2" )] |
2536 | unsafe fn test_mm512_maskz_shldv_epi64() { |
2537 | let a = _mm512_set1_epi64(1); |
2538 | let b = _mm512_set1_epi64(1 << 63); |
2539 | let c = _mm512_set1_epi64(2); |
2540 | let r = _mm512_maskz_shldv_epi64(0, a, b, c); |
2541 | assert_eq_m512i(r, _mm512_setzero_si512()); |
2542 | let r = _mm512_maskz_shldv_epi64(0b11111111, a, b, c); |
2543 | let e = _mm512_set1_epi64(6); |
2544 | assert_eq_m512i(r, e); |
2545 | } |
2546 | |
2547 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2548 | unsafe fn test_mm256_shldv_epi64() { |
2549 | let a = _mm256_set1_epi64x(1); |
2550 | let b = _mm256_set1_epi64x(1 << 63); |
2551 | let c = _mm256_set1_epi64x(2); |
2552 | let r = _mm256_shldv_epi64(a, b, c); |
2553 | let e = _mm256_set1_epi64x(6); |
2554 | assert_eq_m256i(r, e); |
2555 | } |
2556 | |
2557 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2558 | unsafe fn test_mm256_mask_shldv_epi64() { |
2559 | let a = _mm256_set1_epi64x(1); |
2560 | let b = _mm256_set1_epi64x(1 << 63); |
2561 | let c = _mm256_set1_epi64x(2); |
2562 | let r = _mm256_mask_shldv_epi64(a, 0, b, c); |
2563 | assert_eq_m256i(r, a); |
2564 | let r = _mm256_mask_shldv_epi64(a, 0b00001111, b, c); |
2565 | let e = _mm256_set1_epi64x(6); |
2566 | assert_eq_m256i(r, e); |
2567 | } |
2568 | |
2569 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2570 | unsafe fn test_mm256_maskz_shldv_epi64() { |
2571 | let a = _mm256_set1_epi64x(1); |
2572 | let b = _mm256_set1_epi64x(1 << 63); |
2573 | let c = _mm256_set1_epi64x(2); |
2574 | let r = _mm256_maskz_shldv_epi64(0, a, b, c); |
2575 | assert_eq_m256i(r, _mm256_setzero_si256()); |
2576 | let r = _mm256_maskz_shldv_epi64(0b00001111, a, b, c); |
2577 | let e = _mm256_set1_epi64x(6); |
2578 | assert_eq_m256i(r, e); |
2579 | } |
2580 | |
2581 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2582 | unsafe fn test_mm_shldv_epi64() { |
2583 | let a = _mm_set1_epi64x(1); |
2584 | let b = _mm_set1_epi64x(1 << 63); |
2585 | let c = _mm_set1_epi64x(2); |
2586 | let r = _mm_shldv_epi64(a, b, c); |
2587 | let e = _mm_set1_epi64x(6); |
2588 | assert_eq_m128i(r, e); |
2589 | } |
2590 | |
2591 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2592 | unsafe fn test_mm_mask_shldv_epi64() { |
2593 | let a = _mm_set1_epi64x(1); |
2594 | let b = _mm_set1_epi64x(1 << 63); |
2595 | let c = _mm_set1_epi64x(2); |
2596 | let r = _mm_mask_shldv_epi64(a, 0, b, c); |
2597 | assert_eq_m128i(r, a); |
2598 | let r = _mm_mask_shldv_epi64(a, 0b00000011, b, c); |
2599 | let e = _mm_set1_epi64x(6); |
2600 | assert_eq_m128i(r, e); |
2601 | } |
2602 | |
2603 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2604 | unsafe fn test_mm_maskz_shldv_epi64() { |
2605 | let a = _mm_set1_epi64x(1); |
2606 | let b = _mm_set1_epi64x(1 << 63); |
2607 | let c = _mm_set1_epi64x(2); |
2608 | let r = _mm_maskz_shldv_epi64(0, a, b, c); |
2609 | assert_eq_m128i(r, _mm_setzero_si128()); |
2610 | let r = _mm_maskz_shldv_epi64(0b00000011, a, b, c); |
2611 | let e = _mm_set1_epi64x(6); |
2612 | assert_eq_m128i(r, e); |
2613 | } |
2614 | |
2615 | #[simd_test(enable = "avx512vbmi2" )] |
2616 | unsafe fn test_mm512_shldv_epi32() { |
2617 | let a = _mm512_set1_epi32(1); |
2618 | let b = _mm512_set1_epi32(1 << 31); |
2619 | let c = _mm512_set1_epi32(2); |
2620 | let r = _mm512_shldv_epi32(a, b, c); |
2621 | let e = _mm512_set1_epi32(6); |
2622 | assert_eq_m512i(r, e); |
2623 | } |
2624 | |
2625 | #[simd_test(enable = "avx512vbmi2" )] |
2626 | unsafe fn test_mm512_mask_shldv_epi32() { |
2627 | let a = _mm512_set1_epi32(1); |
2628 | let b = _mm512_set1_epi32(1 << 31); |
2629 | let c = _mm512_set1_epi32(2); |
2630 | let r = _mm512_mask_shldv_epi32(a, 0, b, c); |
2631 | assert_eq_m512i(r, a); |
2632 | let r = _mm512_mask_shldv_epi32(a, 0b11111111_11111111, b, c); |
2633 | let e = _mm512_set1_epi32(6); |
2634 | assert_eq_m512i(r, e); |
2635 | } |
2636 | |
2637 | #[simd_test(enable = "avx512vbmi2" )] |
2638 | unsafe fn test_mm512_maskz_shldv_epi32() { |
2639 | let a = _mm512_set1_epi32(1); |
2640 | let b = _mm512_set1_epi32(1 << 31); |
2641 | let c = _mm512_set1_epi32(2); |
2642 | let r = _mm512_maskz_shldv_epi32(0, a, b, c); |
2643 | assert_eq_m512i(r, _mm512_setzero_si512()); |
2644 | let r = _mm512_maskz_shldv_epi32(0b11111111_11111111, a, b, c); |
2645 | let e = _mm512_set1_epi32(6); |
2646 | assert_eq_m512i(r, e); |
2647 | } |
2648 | |
2649 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2650 | unsafe fn test_mm256_shldv_epi32() { |
2651 | let a = _mm256_set1_epi32(1); |
2652 | let b = _mm256_set1_epi32(1 << 31); |
2653 | let c = _mm256_set1_epi32(2); |
2654 | let r = _mm256_shldv_epi32(a, b, c); |
2655 | let e = _mm256_set1_epi32(6); |
2656 | assert_eq_m256i(r, e); |
2657 | } |
2658 | |
2659 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2660 | unsafe fn test_mm256_mask_shldv_epi32() { |
2661 | let a = _mm256_set1_epi32(1); |
2662 | let b = _mm256_set1_epi32(1 << 31); |
2663 | let c = _mm256_set1_epi32(2); |
2664 | let r = _mm256_mask_shldv_epi32(a, 0, b, c); |
2665 | assert_eq_m256i(r, a); |
2666 | let r = _mm256_mask_shldv_epi32(a, 0b11111111, b, c); |
2667 | let e = _mm256_set1_epi32(6); |
2668 | assert_eq_m256i(r, e); |
2669 | } |
2670 | |
2671 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2672 | unsafe fn test_mm256_maskz_shldv_epi32() { |
2673 | let a = _mm256_set1_epi32(1); |
2674 | let b = _mm256_set1_epi32(1 << 31); |
2675 | let c = _mm256_set1_epi32(2); |
2676 | let r = _mm256_maskz_shldv_epi32(0, a, b, c); |
2677 | assert_eq_m256i(r, _mm256_setzero_si256()); |
2678 | let r = _mm256_maskz_shldv_epi32(0b11111111, a, b, c); |
2679 | let e = _mm256_set1_epi32(6); |
2680 | assert_eq_m256i(r, e); |
2681 | } |
2682 | |
2683 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2684 | unsafe fn test_mm_shldv_epi32() { |
2685 | let a = _mm_set1_epi32(1); |
2686 | let b = _mm_set1_epi32(1 << 31); |
2687 | let c = _mm_set1_epi32(2); |
2688 | let r = _mm_shldv_epi32(a, b, c); |
2689 | let e = _mm_set1_epi32(6); |
2690 | assert_eq_m128i(r, e); |
2691 | } |
2692 | |
2693 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2694 | unsafe fn test_mm_mask_shldv_epi32() { |
2695 | let a = _mm_set1_epi32(1); |
2696 | let b = _mm_set1_epi32(1 << 31); |
2697 | let c = _mm_set1_epi32(2); |
2698 | let r = _mm_mask_shldv_epi32(a, 0, b, c); |
2699 | assert_eq_m128i(r, a); |
2700 | let r = _mm_mask_shldv_epi32(a, 0b00001111, b, c); |
2701 | let e = _mm_set1_epi32(6); |
2702 | assert_eq_m128i(r, e); |
2703 | } |
2704 | |
2705 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2706 | unsafe fn test_mm_maskz_shldv_epi32() { |
2707 | let a = _mm_set1_epi32(1); |
2708 | let b = _mm_set1_epi32(1 << 31); |
2709 | let c = _mm_set1_epi32(2); |
2710 | let r = _mm_maskz_shldv_epi32(0, a, b, c); |
2711 | assert_eq_m128i(r, _mm_setzero_si128()); |
2712 | let r = _mm_maskz_shldv_epi32(0b00001111, a, b, c); |
2713 | let e = _mm_set1_epi32(6); |
2714 | assert_eq_m128i(r, e); |
2715 | } |
2716 | |
2717 | #[simd_test(enable = "avx512vbmi2" )] |
2718 | unsafe fn test_mm512_shldv_epi16() { |
2719 | let a = _mm512_set1_epi16(1); |
2720 | let b = _mm512_set1_epi16(1 << 15); |
2721 | let c = _mm512_set1_epi16(2); |
2722 | let r = _mm512_shldv_epi16(a, b, c); |
2723 | let e = _mm512_set1_epi16(6); |
2724 | assert_eq_m512i(r, e); |
2725 | } |
2726 | |
2727 | #[simd_test(enable = "avx512vbmi2" )] |
2728 | unsafe fn test_mm512_mask_shldv_epi16() { |
2729 | let a = _mm512_set1_epi16(1); |
2730 | let b = _mm512_set1_epi16(1 << 15); |
2731 | let c = _mm512_set1_epi16(2); |
2732 | let r = _mm512_mask_shldv_epi16(a, 0, b, c); |
2733 | assert_eq_m512i(r, a); |
2734 | let r = _mm512_mask_shldv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c); |
2735 | let e = _mm512_set1_epi16(6); |
2736 | assert_eq_m512i(r, e); |
2737 | } |
2738 | |
2739 | #[simd_test(enable = "avx512vbmi2" )] |
2740 | unsafe fn test_mm512_maskz_shldv_epi16() { |
2741 | let a = _mm512_set1_epi16(1); |
2742 | let b = _mm512_set1_epi16(1 << 15); |
2743 | let c = _mm512_set1_epi16(2); |
2744 | let r = _mm512_maskz_shldv_epi16(0, a, b, c); |
2745 | assert_eq_m512i(r, _mm512_setzero_si512()); |
2746 | let r = _mm512_maskz_shldv_epi16(0b11111111_11111111_11111111_11111111, a, b, c); |
2747 | let e = _mm512_set1_epi16(6); |
2748 | assert_eq_m512i(r, e); |
2749 | } |
2750 | |
2751 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2752 | unsafe fn test_mm256_shldv_epi16() { |
2753 | let a = _mm256_set1_epi16(1); |
2754 | let b = _mm256_set1_epi16(1 << 15); |
2755 | let c = _mm256_set1_epi16(2); |
2756 | let r = _mm256_shldv_epi16(a, b, c); |
2757 | let e = _mm256_set1_epi16(6); |
2758 | assert_eq_m256i(r, e); |
2759 | } |
2760 | |
2761 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2762 | unsafe fn test_mm256_mask_shldv_epi16() { |
2763 | let a = _mm256_set1_epi16(1); |
2764 | let b = _mm256_set1_epi16(1 << 15); |
2765 | let c = _mm256_set1_epi16(2); |
2766 | let r = _mm256_mask_shldv_epi16(a, 0, b, c); |
2767 | assert_eq_m256i(r, a); |
2768 | let r = _mm256_mask_shldv_epi16(a, 0b11111111_11111111, b, c); |
2769 | let e = _mm256_set1_epi16(6); |
2770 | assert_eq_m256i(r, e); |
2771 | } |
2772 | |
2773 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2774 | unsafe fn test_mm256_maskz_shldv_epi16() { |
2775 | let a = _mm256_set1_epi16(1); |
2776 | let b = _mm256_set1_epi16(1 << 15); |
2777 | let c = _mm256_set1_epi16(2); |
2778 | let r = _mm256_maskz_shldv_epi16(0, a, b, c); |
2779 | assert_eq_m256i(r, _mm256_setzero_si256()); |
2780 | let r = _mm256_maskz_shldv_epi16(0b11111111_11111111, a, b, c); |
2781 | let e = _mm256_set1_epi16(6); |
2782 | assert_eq_m256i(r, e); |
2783 | } |
2784 | |
2785 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2786 | unsafe fn test_mm_shldv_epi16() { |
2787 | let a = _mm_set1_epi16(1); |
2788 | let b = _mm_set1_epi16(1 << 15); |
2789 | let c = _mm_set1_epi16(2); |
2790 | let r = _mm_shldv_epi16(a, b, c); |
2791 | let e = _mm_set1_epi16(6); |
2792 | assert_eq_m128i(r, e); |
2793 | } |
2794 | |
2795 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2796 | unsafe fn test_mm_mask_shldv_epi16() { |
2797 | let a = _mm_set1_epi16(1); |
2798 | let b = _mm_set1_epi16(1 << 15); |
2799 | let c = _mm_set1_epi16(2); |
2800 | let r = _mm_mask_shldv_epi16(a, 0, b, c); |
2801 | assert_eq_m128i(r, a); |
2802 | let r = _mm_mask_shldv_epi16(a, 0b11111111, b, c); |
2803 | let e = _mm_set1_epi16(6); |
2804 | assert_eq_m128i(r, e); |
2805 | } |
2806 | |
2807 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2808 | unsafe fn test_mm_maskz_shldv_epi16() { |
2809 | let a = _mm_set1_epi16(1); |
2810 | let b = _mm_set1_epi16(1 << 15); |
2811 | let c = _mm_set1_epi16(2); |
2812 | let r = _mm_maskz_shldv_epi16(0, a, b, c); |
2813 | assert_eq_m128i(r, _mm_setzero_si128()); |
2814 | let r = _mm_maskz_shldv_epi16(0b11111111, a, b, c); |
2815 | let e = _mm_set1_epi16(6); |
2816 | assert_eq_m128i(r, e); |
2817 | } |
2818 | |
2819 | #[simd_test(enable = "avx512vbmi2" )] |
2820 | unsafe fn test_mm512_shrdv_epi64() { |
2821 | let a = _mm512_set1_epi64(2); |
2822 | let b = _mm512_set1_epi64(8); |
2823 | let c = _mm512_set1_epi64(1); |
2824 | let r = _mm512_shrdv_epi64(a, b, c); |
2825 | let e = _mm512_set1_epi64(1); |
2826 | assert_eq_m512i(r, e); |
2827 | } |
2828 | |
2829 | #[simd_test(enable = "avx512vbmi2" )] |
2830 | unsafe fn test_mm512_mask_shrdv_epi64() { |
2831 | let a = _mm512_set1_epi64(2); |
2832 | let b = _mm512_set1_epi64(8); |
2833 | let c = _mm512_set1_epi64(1); |
2834 | let r = _mm512_mask_shrdv_epi64(a, 0, b, c); |
2835 | assert_eq_m512i(r, a); |
2836 | let r = _mm512_mask_shrdv_epi64(a, 0b11111111, b, c); |
2837 | let e = _mm512_set1_epi64(1); |
2838 | assert_eq_m512i(r, e); |
2839 | } |
2840 | |
2841 | #[simd_test(enable = "avx512vbmi2" )] |
2842 | unsafe fn test_mm512_maskz_shrdv_epi64() { |
2843 | let a = _mm512_set1_epi64(2); |
2844 | let b = _mm512_set1_epi64(8); |
2845 | let c = _mm512_set1_epi64(1); |
2846 | let r = _mm512_maskz_shrdv_epi64(0, a, b, c); |
2847 | assert_eq_m512i(r, _mm512_setzero_si512()); |
2848 | let r = _mm512_maskz_shrdv_epi64(0b11111111, a, b, c); |
2849 | let e = _mm512_set1_epi64(1); |
2850 | assert_eq_m512i(r, e); |
2851 | } |
2852 | |
2853 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2854 | unsafe fn test_mm256_shrdv_epi64() { |
2855 | let a = _mm256_set1_epi64x(2); |
2856 | let b = _mm256_set1_epi64x(8); |
2857 | let c = _mm256_set1_epi64x(1); |
2858 | let r = _mm256_shrdv_epi64(a, b, c); |
2859 | let e = _mm256_set1_epi64x(1); |
2860 | assert_eq_m256i(r, e); |
2861 | } |
2862 | |
2863 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2864 | unsafe fn test_mm256_mask_shrdv_epi64() { |
2865 | let a = _mm256_set1_epi64x(2); |
2866 | let b = _mm256_set1_epi64x(8); |
2867 | let c = _mm256_set1_epi64x(1); |
2868 | let r = _mm256_mask_shrdv_epi64(a, 0, b, c); |
2869 | assert_eq_m256i(r, a); |
2870 | let r = _mm256_mask_shrdv_epi64(a, 0b00001111, b, c); |
2871 | let e = _mm256_set1_epi64x(1); |
2872 | assert_eq_m256i(r, e); |
2873 | } |
2874 | |
2875 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2876 | unsafe fn test_mm256_maskz_shrdv_epi64() { |
2877 | let a = _mm256_set1_epi64x(2); |
2878 | let b = _mm256_set1_epi64x(8); |
2879 | let c = _mm256_set1_epi64x(1); |
2880 | let r = _mm256_maskz_shrdv_epi64(0, a, b, c); |
2881 | assert_eq_m256i(r, _mm256_setzero_si256()); |
2882 | let r = _mm256_maskz_shrdv_epi64(0b00001111, a, b, c); |
2883 | let e = _mm256_set1_epi64x(1); |
2884 | assert_eq_m256i(r, e); |
2885 | } |
2886 | |
2887 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2888 | unsafe fn test_mm_shrdv_epi64() { |
2889 | let a = _mm_set1_epi64x(2); |
2890 | let b = _mm_set1_epi64x(8); |
2891 | let c = _mm_set1_epi64x(1); |
2892 | let r = _mm_shrdv_epi64(a, b, c); |
2893 | let e = _mm_set1_epi64x(1); |
2894 | assert_eq_m128i(r, e); |
2895 | } |
2896 | |
2897 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2898 | unsafe fn test_mm_mask_shrdv_epi64() { |
2899 | let a = _mm_set1_epi64x(2); |
2900 | let b = _mm_set1_epi64x(8); |
2901 | let c = _mm_set1_epi64x(1); |
2902 | let r = _mm_mask_shrdv_epi64(a, 0, b, c); |
2903 | assert_eq_m128i(r, a); |
2904 | let r = _mm_mask_shrdv_epi64(a, 0b00000011, b, c); |
2905 | let e = _mm_set1_epi64x(1); |
2906 | assert_eq_m128i(r, e); |
2907 | } |
2908 | |
2909 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2910 | unsafe fn test_mm_maskz_shrdv_epi64() { |
2911 | let a = _mm_set1_epi64x(2); |
2912 | let b = _mm_set1_epi64x(8); |
2913 | let c = _mm_set1_epi64x(1); |
2914 | let r = _mm_maskz_shrdv_epi64(0, a, b, c); |
2915 | assert_eq_m128i(r, _mm_setzero_si128()); |
2916 | let r = _mm_maskz_shrdv_epi64(0b00000011, a, b, c); |
2917 | let e = _mm_set1_epi64x(1); |
2918 | assert_eq_m128i(r, e); |
2919 | } |
2920 | |
2921 | #[simd_test(enable = "avx512vbmi2" )] |
2922 | unsafe fn test_mm512_shrdv_epi32() { |
2923 | let a = _mm512_set1_epi32(2); |
2924 | let b = _mm512_set1_epi32(8); |
2925 | let c = _mm512_set1_epi32(1); |
2926 | let r = _mm512_shrdv_epi32(a, b, c); |
2927 | let e = _mm512_set1_epi32(1); |
2928 | assert_eq_m512i(r, e); |
2929 | } |
2930 | |
2931 | #[simd_test(enable = "avx512vbmi2" )] |
2932 | unsafe fn test_mm512_mask_shrdv_epi32() { |
2933 | let a = _mm512_set1_epi32(2); |
2934 | let b = _mm512_set1_epi32(8); |
2935 | let c = _mm512_set1_epi32(1); |
2936 | let r = _mm512_mask_shrdv_epi32(a, 0, b, c); |
2937 | assert_eq_m512i(r, a); |
2938 | let r = _mm512_mask_shrdv_epi32(a, 0b11111111_11111111, b, c); |
2939 | let e = _mm512_set1_epi32(1); |
2940 | assert_eq_m512i(r, e); |
2941 | } |
2942 | |
2943 | #[simd_test(enable = "avx512vbmi2" )] |
2944 | unsafe fn test_mm512_maskz_shrdv_epi32() { |
2945 | let a = _mm512_set1_epi32(2); |
2946 | let b = _mm512_set1_epi32(8); |
2947 | let c = _mm512_set1_epi32(1); |
2948 | let r = _mm512_maskz_shrdv_epi32(0, a, b, c); |
2949 | assert_eq_m512i(r, _mm512_setzero_si512()); |
2950 | let r = _mm512_maskz_shrdv_epi32(0b11111111_11111111, a, b, c); |
2951 | let e = _mm512_set1_epi32(1); |
2952 | assert_eq_m512i(r, e); |
2953 | } |
2954 | |
2955 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2956 | unsafe fn test_mm256_shrdv_epi32() { |
2957 | let a = _mm256_set1_epi32(2); |
2958 | let b = _mm256_set1_epi32(8); |
2959 | let c = _mm256_set1_epi32(1); |
2960 | let r = _mm256_shrdv_epi32(a, b, c); |
2961 | let e = _mm256_set1_epi32(1); |
2962 | assert_eq_m256i(r, e); |
2963 | } |
2964 | |
2965 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2966 | unsafe fn test_mm256_mask_shrdv_epi32() { |
2967 | let a = _mm256_set1_epi32(2); |
2968 | let b = _mm256_set1_epi32(8); |
2969 | let c = _mm256_set1_epi32(1); |
2970 | let r = _mm256_mask_shrdv_epi32(a, 0, b, c); |
2971 | assert_eq_m256i(r, a); |
2972 | let r = _mm256_mask_shrdv_epi32(a, 0b11111111, b, c); |
2973 | let e = _mm256_set1_epi32(1); |
2974 | assert_eq_m256i(r, e); |
2975 | } |
2976 | |
2977 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2978 | unsafe fn test_mm256_maskz_shrdv_epi32() { |
2979 | let a = _mm256_set1_epi32(2); |
2980 | let b = _mm256_set1_epi32(8); |
2981 | let c = _mm256_set1_epi32(1); |
2982 | let r = _mm256_maskz_shrdv_epi32(0, a, b, c); |
2983 | assert_eq_m256i(r, _mm256_setzero_si256()); |
2984 | let r = _mm256_maskz_shrdv_epi32(0b11111111, a, b, c); |
2985 | let e = _mm256_set1_epi32(1); |
2986 | assert_eq_m256i(r, e); |
2987 | } |
2988 | |
2989 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
2990 | unsafe fn test_mm_shrdv_epi32() { |
2991 | let a = _mm_set1_epi32(2); |
2992 | let b = _mm_set1_epi32(8); |
2993 | let c = _mm_set1_epi32(1); |
2994 | let r = _mm_shrdv_epi32(a, b, c); |
2995 | let e = _mm_set1_epi32(1); |
2996 | assert_eq_m128i(r, e); |
2997 | } |
2998 | |
2999 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3000 | unsafe fn test_mm_mask_shrdv_epi32() { |
3001 | let a = _mm_set1_epi32(2); |
3002 | let b = _mm_set1_epi32(8); |
3003 | let c = _mm_set1_epi32(1); |
3004 | let r = _mm_mask_shrdv_epi32(a, 0, b, c); |
3005 | assert_eq_m128i(r, a); |
3006 | let r = _mm_mask_shrdv_epi32(a, 0b00001111, b, c); |
3007 | let e = _mm_set1_epi32(1); |
3008 | assert_eq_m128i(r, e); |
3009 | } |
3010 | |
3011 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3012 | unsafe fn test_mm_maskz_shrdv_epi32() { |
3013 | let a = _mm_set1_epi32(2); |
3014 | let b = _mm_set1_epi32(8); |
3015 | let c = _mm_set1_epi32(1); |
3016 | let r = _mm_maskz_shrdv_epi32(0, a, b, c); |
3017 | assert_eq_m128i(r, _mm_setzero_si128()); |
3018 | let r = _mm_maskz_shrdv_epi32(0b00001111, a, b, c); |
3019 | let e = _mm_set1_epi32(1); |
3020 | assert_eq_m128i(r, e); |
3021 | } |
3022 | |
3023 | #[simd_test(enable = "avx512vbmi2" )] |
3024 | unsafe fn test_mm512_shrdv_epi16() { |
3025 | let a = _mm512_set1_epi16(2); |
3026 | let b = _mm512_set1_epi16(8); |
3027 | let c = _mm512_set1_epi16(1); |
3028 | let r = _mm512_shrdv_epi16(a, b, c); |
3029 | let e = _mm512_set1_epi16(1); |
3030 | assert_eq_m512i(r, e); |
3031 | } |
3032 | |
3033 | #[simd_test(enable = "avx512vbmi2" )] |
3034 | unsafe fn test_mm512_mask_shrdv_epi16() { |
3035 | let a = _mm512_set1_epi16(2); |
3036 | let b = _mm512_set1_epi16(8); |
3037 | let c = _mm512_set1_epi16(1); |
3038 | let r = _mm512_mask_shrdv_epi16(a, 0, b, c); |
3039 | assert_eq_m512i(r, a); |
3040 | let r = _mm512_mask_shrdv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c); |
3041 | let e = _mm512_set1_epi16(1); |
3042 | assert_eq_m512i(r, e); |
3043 | } |
3044 | |
3045 | #[simd_test(enable = "avx512vbmi2" )] |
3046 | unsafe fn test_mm512_maskz_shrdv_epi16() { |
3047 | let a = _mm512_set1_epi16(2); |
3048 | let b = _mm512_set1_epi16(8); |
3049 | let c = _mm512_set1_epi16(1); |
3050 | let r = _mm512_maskz_shrdv_epi16(0, a, b, c); |
3051 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3052 | let r = _mm512_maskz_shrdv_epi16(0b11111111_11111111_11111111_11111111, a, b, c); |
3053 | let e = _mm512_set1_epi16(1); |
3054 | assert_eq_m512i(r, e); |
3055 | } |
3056 | |
3057 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3058 | unsafe fn test_mm256_shrdv_epi16() { |
3059 | let a = _mm256_set1_epi16(2); |
3060 | let b = _mm256_set1_epi16(8); |
3061 | let c = _mm256_set1_epi16(1); |
3062 | let r = _mm256_shrdv_epi16(a, b, c); |
3063 | let e = _mm256_set1_epi16(1); |
3064 | assert_eq_m256i(r, e); |
3065 | } |
3066 | |
3067 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3068 | unsafe fn test_mm256_mask_shrdv_epi16() { |
3069 | let a = _mm256_set1_epi16(2); |
3070 | let b = _mm256_set1_epi16(8); |
3071 | let c = _mm256_set1_epi16(1); |
3072 | let r = _mm256_mask_shrdv_epi16(a, 0, b, c); |
3073 | assert_eq_m256i(r, a); |
3074 | let r = _mm256_mask_shrdv_epi16(a, 0b11111111_11111111, b, c); |
3075 | let e = _mm256_set1_epi16(1); |
3076 | assert_eq_m256i(r, e); |
3077 | } |
3078 | |
3079 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3080 | unsafe fn test_mm256_maskz_shrdv_epi16() { |
3081 | let a = _mm256_set1_epi16(2); |
3082 | let b = _mm256_set1_epi16(8); |
3083 | let c = _mm256_set1_epi16(1); |
3084 | let r = _mm256_maskz_shrdv_epi16(0, a, b, c); |
3085 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3086 | let r = _mm256_maskz_shrdv_epi16(0b11111111_11111111, a, b, c); |
3087 | let e = _mm256_set1_epi16(1); |
3088 | assert_eq_m256i(r, e); |
3089 | } |
3090 | |
3091 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3092 | unsafe fn test_mm_shrdv_epi16() { |
3093 | let a = _mm_set1_epi16(2); |
3094 | let b = _mm_set1_epi16(8); |
3095 | let c = _mm_set1_epi16(1); |
3096 | let r = _mm_shrdv_epi16(a, b, c); |
3097 | let e = _mm_set1_epi16(1); |
3098 | assert_eq_m128i(r, e); |
3099 | } |
3100 | |
3101 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3102 | unsafe fn test_mm_mask_shrdv_epi16() { |
3103 | let a = _mm_set1_epi16(2); |
3104 | let b = _mm_set1_epi16(8); |
3105 | let c = _mm_set1_epi16(1); |
3106 | let r = _mm_mask_shrdv_epi16(a, 0, b, c); |
3107 | assert_eq_m128i(r, a); |
3108 | let r = _mm_mask_shrdv_epi16(a, 0b11111111, b, c); |
3109 | let e = _mm_set1_epi16(1); |
3110 | assert_eq_m128i(r, e); |
3111 | } |
3112 | |
3113 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3114 | unsafe fn test_mm_maskz_shrdv_epi16() { |
3115 | let a = _mm_set1_epi16(2); |
3116 | let b = _mm_set1_epi16(8); |
3117 | let c = _mm_set1_epi16(1); |
3118 | let r = _mm_maskz_shrdv_epi16(0, a, b, c); |
3119 | assert_eq_m128i(r, _mm_setzero_si128()); |
3120 | let r = _mm_maskz_shrdv_epi16(0b11111111, a, b, c); |
3121 | let e = _mm_set1_epi16(1); |
3122 | assert_eq_m128i(r, e); |
3123 | } |
3124 | |
3125 | #[simd_test(enable = "avx512vbmi2" )] |
3126 | unsafe fn test_mm512_shldi_epi64() { |
3127 | let a = _mm512_set1_epi64(1); |
3128 | let b = _mm512_set1_epi64(1 << 63); |
3129 | let r = _mm512_shldi_epi64::<2>(a, b); |
3130 | let e = _mm512_set1_epi64(6); |
3131 | assert_eq_m512i(r, e); |
3132 | } |
3133 | |
3134 | #[simd_test(enable = "avx512vbmi2" )] |
3135 | unsafe fn test_mm512_mask_shldi_epi64() { |
3136 | let a = _mm512_set1_epi64(1); |
3137 | let b = _mm512_set1_epi64(1 << 63); |
3138 | let r = _mm512_mask_shldi_epi64::<2>(a, 0, a, b); |
3139 | assert_eq_m512i(r, a); |
3140 | let r = _mm512_mask_shldi_epi64::<2>(a, 0b11111111, a, b); |
3141 | let e = _mm512_set1_epi64(6); |
3142 | assert_eq_m512i(r, e); |
3143 | } |
3144 | |
3145 | #[simd_test(enable = "avx512vbmi2" )] |
3146 | unsafe fn test_mm512_maskz_shldi_epi64() { |
3147 | let a = _mm512_set1_epi64(1); |
3148 | let b = _mm512_set1_epi64(1 << 63); |
3149 | let r = _mm512_maskz_shldi_epi64::<2>(0, a, b); |
3150 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3151 | let r = _mm512_maskz_shldi_epi64::<2>(0b11111111, a, b); |
3152 | let e = _mm512_set1_epi64(6); |
3153 | assert_eq_m512i(r, e); |
3154 | } |
3155 | |
3156 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3157 | unsafe fn test_mm256_shldi_epi64() { |
3158 | let a = _mm256_set1_epi64x(1); |
3159 | let b = _mm256_set1_epi64x(1 << 63); |
3160 | let r = _mm256_shldi_epi64::<2>(a, b); |
3161 | let e = _mm256_set1_epi64x(6); |
3162 | assert_eq_m256i(r, e); |
3163 | } |
3164 | |
3165 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3166 | unsafe fn test_mm256_mask_shldi_epi64() { |
3167 | let a = _mm256_set1_epi64x(1); |
3168 | let b = _mm256_set1_epi64x(1 << 63); |
3169 | let r = _mm256_mask_shldi_epi64::<2>(a, 0, a, b); |
3170 | assert_eq_m256i(r, a); |
3171 | let r = _mm256_mask_shldi_epi64::<2>(a, 0b00001111, a, b); |
3172 | let e = _mm256_set1_epi64x(6); |
3173 | assert_eq_m256i(r, e); |
3174 | } |
3175 | |
3176 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3177 | unsafe fn test_mm256_maskz_shldi_epi64() { |
3178 | let a = _mm256_set1_epi64x(1); |
3179 | let b = _mm256_set1_epi64x(1 << 63); |
3180 | let r = _mm256_maskz_shldi_epi64::<2>(0, a, b); |
3181 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3182 | let r = _mm256_maskz_shldi_epi64::<2>(0b00001111, a, b); |
3183 | let e = _mm256_set1_epi64x(6); |
3184 | assert_eq_m256i(r, e); |
3185 | } |
3186 | |
3187 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3188 | unsafe fn test_mm_shldi_epi64() { |
3189 | let a = _mm_set1_epi64x(1); |
3190 | let b = _mm_set1_epi64x(1 << 63); |
3191 | let r = _mm_shldi_epi64::<2>(a, b); |
3192 | let e = _mm_set1_epi64x(6); |
3193 | assert_eq_m128i(r, e); |
3194 | } |
3195 | |
3196 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3197 | unsafe fn test_mm_mask_shldi_epi64() { |
3198 | let a = _mm_set1_epi64x(1); |
3199 | let b = _mm_set1_epi64x(1 << 63); |
3200 | let r = _mm_mask_shldi_epi64::<2>(a, 0, a, b); |
3201 | assert_eq_m128i(r, a); |
3202 | let r = _mm_mask_shldi_epi64::<2>(a, 0b00000011, a, b); |
3203 | let e = _mm_set1_epi64x(6); |
3204 | assert_eq_m128i(r, e); |
3205 | } |
3206 | |
3207 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3208 | unsafe fn test_mm_maskz_shldi_epi64() { |
3209 | let a = _mm_set1_epi64x(1); |
3210 | let b = _mm_set1_epi64x(1 << 63); |
3211 | let r = _mm_maskz_shldi_epi64::<2>(0, a, b); |
3212 | assert_eq_m128i(r, _mm_setzero_si128()); |
3213 | let r = _mm_maskz_shldi_epi64::<2>(0b00000011, a, b); |
3214 | let e = _mm_set1_epi64x(6); |
3215 | assert_eq_m128i(r, e); |
3216 | } |
3217 | |
3218 | #[simd_test(enable = "avx512vbmi2" )] |
3219 | unsafe fn test_mm512_shldi_epi32() { |
3220 | let a = _mm512_set1_epi32(1); |
3221 | let b = _mm512_set1_epi32(1 << 31); |
3222 | let r = _mm512_shldi_epi32::<2>(a, b); |
3223 | let e = _mm512_set1_epi32(6); |
3224 | assert_eq_m512i(r, e); |
3225 | } |
3226 | |
3227 | #[simd_test(enable = "avx512vbmi2" )] |
3228 | unsafe fn test_mm512_mask_shldi_epi32() { |
3229 | let a = _mm512_set1_epi32(1); |
3230 | let b = _mm512_set1_epi32(1 << 31); |
3231 | let r = _mm512_mask_shldi_epi32::<2>(a, 0, a, b); |
3232 | assert_eq_m512i(r, a); |
3233 | let r = _mm512_mask_shldi_epi32::<2>(a, 0b11111111_11111111, a, b); |
3234 | let e = _mm512_set1_epi32(6); |
3235 | assert_eq_m512i(r, e); |
3236 | } |
3237 | |
3238 | #[simd_test(enable = "avx512vbmi2" )] |
3239 | unsafe fn test_mm512_maskz_shldi_epi32() { |
3240 | let a = _mm512_set1_epi32(1); |
3241 | let b = _mm512_set1_epi32(1 << 31); |
3242 | let r = _mm512_maskz_shldi_epi32::<2>(0, a, b); |
3243 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3244 | let r = _mm512_maskz_shldi_epi32::<2>(0b11111111_11111111, a, b); |
3245 | let e = _mm512_set1_epi32(6); |
3246 | assert_eq_m512i(r, e); |
3247 | } |
3248 | |
3249 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3250 | unsafe fn test_mm256_shldi_epi32() { |
3251 | let a = _mm256_set1_epi32(1); |
3252 | let b = _mm256_set1_epi32(1 << 31); |
3253 | let r = _mm256_shldi_epi32::<2>(a, b); |
3254 | let e = _mm256_set1_epi32(6); |
3255 | assert_eq_m256i(r, e); |
3256 | } |
3257 | |
3258 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3259 | unsafe fn test_mm256_mask_shldi_epi32() { |
3260 | let a = _mm256_set1_epi32(1); |
3261 | let b = _mm256_set1_epi32(1 << 31); |
3262 | let r = _mm256_mask_shldi_epi32::<2>(a, 0, a, b); |
3263 | assert_eq_m256i(r, a); |
3264 | let r = _mm256_mask_shldi_epi32::<2>(a, 0b11111111, a, b); |
3265 | let e = _mm256_set1_epi32(6); |
3266 | assert_eq_m256i(r, e); |
3267 | } |
3268 | |
3269 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3270 | unsafe fn test_mm256_maskz_shldi_epi32() { |
3271 | let a = _mm256_set1_epi32(1); |
3272 | let b = _mm256_set1_epi32(1 << 31); |
3273 | let r = _mm256_maskz_shldi_epi32::<2>(0, a, b); |
3274 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3275 | let r = _mm256_maskz_shldi_epi32::<2>(0b11111111, a, b); |
3276 | let e = _mm256_set1_epi32(6); |
3277 | assert_eq_m256i(r, e); |
3278 | } |
3279 | |
3280 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3281 | unsafe fn test_mm_shldi_epi32() { |
3282 | let a = _mm_set1_epi32(1); |
3283 | let b = _mm_set1_epi32(1 << 31); |
3284 | let r = _mm_shldi_epi32::<2>(a, b); |
3285 | let e = _mm_set1_epi32(6); |
3286 | assert_eq_m128i(r, e); |
3287 | } |
3288 | |
3289 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3290 | unsafe fn test_mm_mask_shldi_epi32() { |
3291 | let a = _mm_set1_epi32(1); |
3292 | let b = _mm_set1_epi32(1 << 31); |
3293 | let r = _mm_mask_shldi_epi32::<2>(a, 0, a, b); |
3294 | assert_eq_m128i(r, a); |
3295 | let r = _mm_mask_shldi_epi32::<2>(a, 0b00001111, a, b); |
3296 | let e = _mm_set1_epi32(6); |
3297 | assert_eq_m128i(r, e); |
3298 | } |
3299 | |
3300 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3301 | unsafe fn test_mm_maskz_shldi_epi32() { |
3302 | let a = _mm_set1_epi32(1); |
3303 | let b = _mm_set1_epi32(1 << 31); |
3304 | let r = _mm_maskz_shldi_epi32::<2>(0, a, b); |
3305 | assert_eq_m128i(r, _mm_setzero_si128()); |
3306 | let r = _mm_maskz_shldi_epi32::<2>(0b00001111, a, b); |
3307 | let e = _mm_set1_epi32(6); |
3308 | assert_eq_m128i(r, e); |
3309 | } |
3310 | |
3311 | #[simd_test(enable = "avx512vbmi2" )] |
3312 | unsafe fn test_mm512_shldi_epi16() { |
3313 | let a = _mm512_set1_epi16(1); |
3314 | let b = _mm512_set1_epi16(1 << 15); |
3315 | let r = _mm512_shldi_epi16::<2>(a, b); |
3316 | let e = _mm512_set1_epi16(6); |
3317 | assert_eq_m512i(r, e); |
3318 | } |
3319 | |
3320 | #[simd_test(enable = "avx512vbmi2" )] |
3321 | unsafe fn test_mm512_mask_shldi_epi16() { |
3322 | let a = _mm512_set1_epi16(1); |
3323 | let b = _mm512_set1_epi16(1 << 15); |
3324 | let r = _mm512_mask_shldi_epi16::<2>(a, 0, a, b); |
3325 | assert_eq_m512i(r, a); |
3326 | let r = _mm512_mask_shldi_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a, b); |
3327 | let e = _mm512_set1_epi16(6); |
3328 | assert_eq_m512i(r, e); |
3329 | } |
3330 | |
3331 | #[simd_test(enable = "avx512vbmi2" )] |
3332 | unsafe fn test_mm512_maskz_shldi_epi16() { |
3333 | let a = _mm512_set1_epi16(1); |
3334 | let b = _mm512_set1_epi16(1 << 15); |
3335 | let r = _mm512_maskz_shldi_epi16::<2>(0, a, b); |
3336 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3337 | let r = _mm512_maskz_shldi_epi16::<2>(0b11111111_11111111_11111111_11111111, a, b); |
3338 | let e = _mm512_set1_epi16(6); |
3339 | assert_eq_m512i(r, e); |
3340 | } |
3341 | |
3342 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3343 | unsafe fn test_mm256_shldi_epi16() { |
3344 | let a = _mm256_set1_epi16(1); |
3345 | let b = _mm256_set1_epi16(1 << 15); |
3346 | let r = _mm256_shldi_epi16::<2>(a, b); |
3347 | let e = _mm256_set1_epi16(6); |
3348 | assert_eq_m256i(r, e); |
3349 | } |
3350 | |
3351 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3352 | unsafe fn test_mm256_mask_shldi_epi16() { |
3353 | let a = _mm256_set1_epi16(1); |
3354 | let b = _mm256_set1_epi16(1 << 15); |
3355 | let r = _mm256_mask_shldi_epi16::<2>(a, 0, a, b); |
3356 | assert_eq_m256i(r, a); |
3357 | let r = _mm256_mask_shldi_epi16::<2>(a, 0b11111111_11111111, a, b); |
3358 | let e = _mm256_set1_epi16(6); |
3359 | assert_eq_m256i(r, e); |
3360 | } |
3361 | |
3362 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3363 | unsafe fn test_mm256_maskz_shldi_epi16() { |
3364 | let a = _mm256_set1_epi16(1); |
3365 | let b = _mm256_set1_epi16(1 << 15); |
3366 | let r = _mm256_maskz_shldi_epi16::<2>(0, a, b); |
3367 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3368 | let r = _mm256_maskz_shldi_epi16::<2>(0b11111111_11111111, a, b); |
3369 | let e = _mm256_set1_epi16(6); |
3370 | assert_eq_m256i(r, e); |
3371 | } |
3372 | |
3373 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3374 | unsafe fn test_mm_shldi_epi16() { |
3375 | let a = _mm_set1_epi16(1); |
3376 | let b = _mm_set1_epi16(1 << 15); |
3377 | let r = _mm_shldi_epi16::<2>(a, b); |
3378 | let e = _mm_set1_epi16(6); |
3379 | assert_eq_m128i(r, e); |
3380 | } |
3381 | |
3382 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3383 | unsafe fn test_mm_mask_shldi_epi16() { |
3384 | let a = _mm_set1_epi16(1); |
3385 | let b = _mm_set1_epi16(1 << 15); |
3386 | let r = _mm_mask_shldi_epi16::<2>(a, 0, a, b); |
3387 | assert_eq_m128i(r, a); |
3388 | let r = _mm_mask_shldi_epi16::<2>(a, 0b11111111, a, b); |
3389 | let e = _mm_set1_epi16(6); |
3390 | assert_eq_m128i(r, e); |
3391 | } |
3392 | |
3393 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3394 | unsafe fn test_mm_maskz_shldi_epi16() { |
3395 | let a = _mm_set1_epi16(1); |
3396 | let b = _mm_set1_epi16(1 << 15); |
3397 | let r = _mm_maskz_shldi_epi16::<2>(0, a, b); |
3398 | assert_eq_m128i(r, _mm_setzero_si128()); |
3399 | let r = _mm_maskz_shldi_epi16::<2>(0b11111111, a, b); |
3400 | let e = _mm_set1_epi16(6); |
3401 | assert_eq_m128i(r, e); |
3402 | } |
3403 | |
3404 | #[simd_test(enable = "avx512vbmi2" )] |
3405 | unsafe fn test_mm512_shrdi_epi64() { |
3406 | let a = _mm512_set1_epi64(2); |
3407 | let b = _mm512_set1_epi64(8); |
3408 | let r = _mm512_shrdi_epi64::<1>(a, b); |
3409 | let e = _mm512_set1_epi64(1); |
3410 | assert_eq_m512i(r, e); |
3411 | } |
3412 | |
3413 | #[simd_test(enable = "avx512vbmi2" )] |
3414 | unsafe fn test_mm512_mask_shrdi_epi64() { |
3415 | let a = _mm512_set1_epi64(2); |
3416 | let b = _mm512_set1_epi64(8); |
3417 | let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b); |
3418 | assert_eq_m512i(r, a); |
3419 | let r = _mm512_mask_shrdi_epi64::<1>(a, 0b11111111, a, b); |
3420 | let e = _mm512_set1_epi64(1); |
3421 | assert_eq_m512i(r, e); |
3422 | } |
3423 | |
3424 | #[simd_test(enable = "avx512vbmi2" )] |
3425 | unsafe fn test_mm512_maskz_shrdi_epi64() { |
3426 | let a = _mm512_set1_epi64(2); |
3427 | let b = _mm512_set1_epi64(8); |
3428 | let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b); |
3429 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3430 | let r = _mm512_maskz_shrdi_epi64::<1>(0b11111111, a, b); |
3431 | let e = _mm512_set1_epi64(1); |
3432 | assert_eq_m512i(r, e); |
3433 | } |
3434 | |
3435 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3436 | unsafe fn test_mm256_shrdi_epi64() { |
3437 | let a = _mm256_set1_epi64x(2); |
3438 | let b = _mm256_set1_epi64x(8); |
3439 | let r = _mm256_shrdi_epi64::<1>(a, b); |
3440 | let e = _mm256_set1_epi64x(1); |
3441 | assert_eq_m256i(r, e); |
3442 | } |
3443 | |
3444 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3445 | unsafe fn test_mm256_mask_shrdi_epi64() { |
3446 | let a = _mm256_set1_epi64x(2); |
3447 | let b = _mm256_set1_epi64x(8); |
3448 | let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b); |
3449 | assert_eq_m256i(r, a); |
3450 | let r = _mm256_mask_shrdi_epi64::<1>(a, 0b00001111, a, b); |
3451 | let e = _mm256_set1_epi64x(1); |
3452 | assert_eq_m256i(r, e); |
3453 | } |
3454 | |
3455 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3456 | unsafe fn test_mm256_maskz_shrdi_epi64() { |
3457 | let a = _mm256_set1_epi64x(2); |
3458 | let b = _mm256_set1_epi64x(8); |
3459 | let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b); |
3460 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3461 | let r = _mm256_maskz_shrdi_epi64::<1>(0b00001111, a, b); |
3462 | let e = _mm256_set1_epi64x(1); |
3463 | assert_eq_m256i(r, e); |
3464 | } |
3465 | |
3466 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3467 | unsafe fn test_mm_shrdi_epi64() { |
3468 | let a = _mm_set1_epi64x(2); |
3469 | let b = _mm_set1_epi64x(8); |
3470 | let r = _mm_shrdi_epi64::<1>(a, b); |
3471 | let e = _mm_set1_epi64x(1); |
3472 | assert_eq_m128i(r, e); |
3473 | } |
3474 | |
3475 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3476 | unsafe fn test_mm_mask_shrdi_epi64() { |
3477 | let a = _mm_set1_epi64x(2); |
3478 | let b = _mm_set1_epi64x(8); |
3479 | let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b); |
3480 | assert_eq_m128i(r, a); |
3481 | let r = _mm_mask_shrdi_epi64::<1>(a, 0b00000011, a, b); |
3482 | let e = _mm_set1_epi64x(1); |
3483 | assert_eq_m128i(r, e); |
3484 | } |
3485 | |
3486 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3487 | unsafe fn test_mm_maskz_shrdi_epi64() { |
3488 | let a = _mm_set1_epi64x(2); |
3489 | let b = _mm_set1_epi64x(8); |
3490 | let r = _mm_maskz_shrdi_epi64::<1>(0, a, b); |
3491 | assert_eq_m128i(r, _mm_setzero_si128()); |
3492 | let r = _mm_maskz_shrdi_epi64::<1>(0b00000011, a, b); |
3493 | let e = _mm_set1_epi64x(1); |
3494 | assert_eq_m128i(r, e); |
3495 | } |
3496 | |
3497 | #[simd_test(enable = "avx512vbmi2" )] |
3498 | unsafe fn test_mm512_shrdi_epi32() { |
3499 | let a = _mm512_set1_epi32(2); |
3500 | let b = _mm512_set1_epi32(8); |
3501 | let r = _mm512_shrdi_epi32::<1>(a, b); |
3502 | let e = _mm512_set1_epi32(1); |
3503 | assert_eq_m512i(r, e); |
3504 | } |
3505 | |
3506 | #[simd_test(enable = "avx512vbmi2" )] |
3507 | unsafe fn test_mm512_mask_shrdi_epi32() { |
3508 | let a = _mm512_set1_epi32(2); |
3509 | let b = _mm512_set1_epi32(8); |
3510 | let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b); |
3511 | assert_eq_m512i(r, a); |
3512 | let r = _mm512_mask_shrdi_epi32::<1>(a, 0b11111111_11111111, a, b); |
3513 | let e = _mm512_set1_epi32(1); |
3514 | assert_eq_m512i(r, e); |
3515 | } |
3516 | |
3517 | #[simd_test(enable = "avx512vbmi2" )] |
3518 | unsafe fn test_mm512_maskz_shrdi_epi32() { |
3519 | let a = _mm512_set1_epi32(2); |
3520 | let b = _mm512_set1_epi32(8); |
3521 | let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b); |
3522 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3523 | let r = _mm512_maskz_shrdi_epi32::<1>(0b11111111_11111111, a, b); |
3524 | let e = _mm512_set1_epi32(1); |
3525 | assert_eq_m512i(r, e); |
3526 | } |
3527 | |
3528 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3529 | unsafe fn test_mm256_shrdi_epi32() { |
3530 | let a = _mm256_set1_epi32(2); |
3531 | let b = _mm256_set1_epi32(8); |
3532 | let r = _mm256_shrdi_epi32::<1>(a, b); |
3533 | let e = _mm256_set1_epi32(1); |
3534 | assert_eq_m256i(r, e); |
3535 | } |
3536 | |
3537 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3538 | unsafe fn test_mm256_mask_shrdi_epi32() { |
3539 | let a = _mm256_set1_epi32(2); |
3540 | let b = _mm256_set1_epi32(8); |
3541 | let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b); |
3542 | assert_eq_m256i(r, a); |
3543 | let r = _mm256_mask_shrdi_epi32::<1>(a, 0b11111111, a, b); |
3544 | let e = _mm256_set1_epi32(1); |
3545 | assert_eq_m256i(r, e); |
3546 | } |
3547 | |
3548 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3549 | unsafe fn test_mm256_maskz_shrdi_epi32() { |
3550 | let a = _mm256_set1_epi32(2); |
3551 | let b = _mm256_set1_epi32(8); |
3552 | let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b); |
3553 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3554 | let r = _mm256_maskz_shrdi_epi32::<1>(0b11111111, a, b); |
3555 | let e = _mm256_set1_epi32(1); |
3556 | assert_eq_m256i(r, e); |
3557 | } |
3558 | |
3559 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3560 | unsafe fn test_mm_shrdi_epi32() { |
3561 | let a = _mm_set1_epi32(2); |
3562 | let b = _mm_set1_epi32(8); |
3563 | let r = _mm_shrdi_epi32::<1>(a, b); |
3564 | let e = _mm_set1_epi32(1); |
3565 | assert_eq_m128i(r, e); |
3566 | } |
3567 | |
3568 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3569 | unsafe fn test_mm_mask_shrdi_epi32() { |
3570 | let a = _mm_set1_epi32(2); |
3571 | let b = _mm_set1_epi32(8); |
3572 | let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b); |
3573 | assert_eq_m128i(r, a); |
3574 | let r = _mm_mask_shrdi_epi32::<1>(a, 0b00001111, a, b); |
3575 | let e = _mm_set1_epi32(1); |
3576 | assert_eq_m128i(r, e); |
3577 | } |
3578 | |
3579 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3580 | unsafe fn test_mm_maskz_shrdi_epi32() { |
3581 | let a = _mm_set1_epi32(2); |
3582 | let b = _mm_set1_epi32(8); |
3583 | let r = _mm_maskz_shrdi_epi32::<1>(0, a, b); |
3584 | assert_eq_m128i(r, _mm_setzero_si128()); |
3585 | let r = _mm_maskz_shrdi_epi32::<1>(0b00001111, a, b); |
3586 | let e = _mm_set1_epi32(1); |
3587 | assert_eq_m128i(r, e); |
3588 | } |
3589 | |
3590 | #[simd_test(enable = "avx512vbmi2" )] |
3591 | unsafe fn test_mm512_shrdi_epi16() { |
3592 | let a = _mm512_set1_epi16(2); |
3593 | let b = _mm512_set1_epi16(8); |
3594 | let r = _mm512_shrdi_epi16::<1>(a, b); |
3595 | let e = _mm512_set1_epi16(1); |
3596 | assert_eq_m512i(r, e); |
3597 | } |
3598 | |
3599 | #[simd_test(enable = "avx512vbmi2" )] |
3600 | unsafe fn test_mm512_mask_shrdi_epi16() { |
3601 | let a = _mm512_set1_epi16(2); |
3602 | let b = _mm512_set1_epi16(8); |
3603 | let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b); |
3604 | assert_eq_m512i(r, a); |
3605 | let r = _mm512_mask_shrdi_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a, b); |
3606 | let e = _mm512_set1_epi16(1); |
3607 | assert_eq_m512i(r, e); |
3608 | } |
3609 | |
3610 | #[simd_test(enable = "avx512vbmi2" )] |
3611 | unsafe fn test_mm512_maskz_shrdi_epi16() { |
3612 | let a = _mm512_set1_epi16(2); |
3613 | let b = _mm512_set1_epi16(8); |
3614 | let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b); |
3615 | assert_eq_m512i(r, _mm512_setzero_si512()); |
3616 | let r = _mm512_maskz_shrdi_epi16::<1>(0b11111111_11111111_11111111_11111111, a, b); |
3617 | let e = _mm512_set1_epi16(1); |
3618 | assert_eq_m512i(r, e); |
3619 | } |
3620 | |
3621 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3622 | unsafe fn test_mm256_shrdi_epi16() { |
3623 | let a = _mm256_set1_epi16(2); |
3624 | let b = _mm256_set1_epi16(8); |
3625 | let r = _mm256_shrdi_epi16::<1>(a, b); |
3626 | let e = _mm256_set1_epi16(1); |
3627 | assert_eq_m256i(r, e); |
3628 | } |
3629 | |
3630 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3631 | unsafe fn test_mm256_mask_shrdi_epi16() { |
3632 | let a = _mm256_set1_epi16(2); |
3633 | let b = _mm256_set1_epi16(8); |
3634 | let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b); |
3635 | assert_eq_m256i(r, a); |
3636 | let r = _mm256_mask_shrdi_epi16::<1>(a, 0b11111111_11111111, a, b); |
3637 | let e = _mm256_set1_epi16(1); |
3638 | assert_eq_m256i(r, e); |
3639 | } |
3640 | |
3641 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3642 | unsafe fn test_mm256_maskz_shrdi_epi16() { |
3643 | let a = _mm256_set1_epi16(2); |
3644 | let b = _mm256_set1_epi16(8); |
3645 | let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b); |
3646 | assert_eq_m256i(r, _mm256_setzero_si256()); |
3647 | let r = _mm256_maskz_shrdi_epi16::<1>(0b11111111_11111111, a, b); |
3648 | let e = _mm256_set1_epi16(1); |
3649 | assert_eq_m256i(r, e); |
3650 | } |
3651 | |
3652 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3653 | unsafe fn test_mm_shrdi_epi16() { |
3654 | let a = _mm_set1_epi16(2); |
3655 | let b = _mm_set1_epi16(8); |
3656 | let r = _mm_shrdi_epi16::<1>(a, b); |
3657 | let e = _mm_set1_epi16(1); |
3658 | assert_eq_m128i(r, e); |
3659 | } |
3660 | |
3661 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3662 | unsafe fn test_mm_mask_shrdi_epi16() { |
3663 | let a = _mm_set1_epi16(2); |
3664 | let b = _mm_set1_epi16(8); |
3665 | let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b); |
3666 | assert_eq_m128i(r, a); |
3667 | let r = _mm_mask_shrdi_epi16::<1>(a, 0b11111111, a, b); |
3668 | let e = _mm_set1_epi16(1); |
3669 | assert_eq_m128i(r, e); |
3670 | } |
3671 | |
3672 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3673 | unsafe fn test_mm_maskz_shrdi_epi16() { |
3674 | let a = _mm_set1_epi16(2); |
3675 | let b = _mm_set1_epi16(8); |
3676 | let r = _mm_maskz_shrdi_epi16::<1>(0, a, b); |
3677 | assert_eq_m128i(r, _mm_setzero_si128()); |
3678 | let r = _mm_maskz_shrdi_epi16::<1>(0b11111111, a, b); |
3679 | let e = _mm_set1_epi16(1); |
3680 | assert_eq_m128i(r, e); |
3681 | } |
3682 | |
3683 | #[simd_test(enable = "avx512vbmi2" )] |
3684 | unsafe fn test_mm512_mask_expandloadu_epi16() { |
3685 | let src = _mm512_set1_epi16(42); |
3686 | let a = &[ |
3687 | 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
3688 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
3689 | ]; |
3690 | let p = a.as_ptr(); |
3691 | let m = 0b11101000_11001010_11110000_00001111; |
3692 | let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p)); |
3693 | let e = _mm512_set_epi16( |
3694 | 16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42, |
3695 | 42, 42, 42, 42, 42, 4, 3, 2, 1, |
3696 | ); |
3697 | assert_eq_m512i(r, e); |
3698 | } |
3699 | |
3700 | #[simd_test(enable = "avx512vbmi2" )] |
3701 | unsafe fn test_mm512_maskz_expandloadu_epi16() { |
3702 | let a = &[ |
3703 | 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
3704 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
3705 | ]; |
3706 | let p = a.as_ptr(); |
3707 | let m = 0b11101000_11001010_11110000_00001111; |
3708 | let r = _mm512_maskz_expandloadu_epi16(m, black_box(p)); |
3709 | let e = _mm512_set_epi16( |
3710 | 16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0, |
3711 | 0, 4, 3, 2, 1, |
3712 | ); |
3713 | assert_eq_m512i(r, e); |
3714 | } |
3715 | |
3716 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3717 | unsafe fn test_mm256_mask_expandloadu_epi16() { |
3718 | let src = _mm256_set1_epi16(42); |
3719 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
3720 | let p = a.as_ptr(); |
3721 | let m = 0b11101000_11001010; |
3722 | let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p)); |
3723 | let e = _mm256_set_epi16(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42); |
3724 | assert_eq_m256i(r, e); |
3725 | } |
3726 | |
3727 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3728 | unsafe fn test_mm256_maskz_expandloadu_epi16() { |
3729 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
3730 | let p = a.as_ptr(); |
3731 | let m = 0b11101000_11001010; |
3732 | let r = _mm256_maskz_expandloadu_epi16(m, black_box(p)); |
3733 | let e = _mm256_set_epi16(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0); |
3734 | assert_eq_m256i(r, e); |
3735 | } |
3736 | |
3737 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3738 | unsafe fn test_mm_mask_expandloadu_epi16() { |
3739 | let src = _mm_set1_epi16(42); |
3740 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; |
3741 | let p = a.as_ptr(); |
3742 | let m = 0b11101000; |
3743 | let r = _mm_mask_expandloadu_epi16(src, m, black_box(p)); |
3744 | let e = _mm_set_epi16(4, 3, 2, 42, 1, 42, 42, 42); |
3745 | assert_eq_m128i(r, e); |
3746 | } |
3747 | |
3748 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3749 | unsafe fn test_mm_maskz_expandloadu_epi16() { |
3750 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; |
3751 | let p = a.as_ptr(); |
3752 | let m = 0b11101000; |
3753 | let r = _mm_maskz_expandloadu_epi16(m, black_box(p)); |
3754 | let e = _mm_set_epi16(4, 3, 2, 0, 1, 0, 0, 0); |
3755 | assert_eq_m128i(r, e); |
3756 | } |
3757 | |
3758 | #[simd_test(enable = "avx512vbmi2" )] |
3759 | unsafe fn test_mm512_mask_expandloadu_epi8() { |
3760 | let src = _mm512_set1_epi8(42); |
3761 | let a = &[ |
3762 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
3763 | 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
3764 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, |
3765 | ]; |
3766 | let p = a.as_ptr(); |
3767 | let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101; |
3768 | let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p)); |
3769 | let e = _mm512_set_epi8( |
3770 | 32, 31, 30, 42, 29, 42, 42, 42, 28, 27, 42, 42, 26, 42, 25, 42, 24, 23, 22, 21, 42, 42, |
3771 | 42, 42, 42, 42, 42, 42, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 42, 42, 42, 42, |
3772 | 42, 42, 42, 42, 8, 42, 7, 42, 6, 42, 5, 42, 42, 4, 42, 3, 42, 2, 42, 1, |
3773 | ); |
3774 | assert_eq_m512i(r, e); |
3775 | } |
3776 | |
3777 | #[simd_test(enable = "avx512vbmi2" )] |
3778 | unsafe fn test_mm512_maskz_expandloadu_epi8() { |
3779 | let a = &[ |
3780 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
3781 | 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
3782 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, |
3783 | ]; |
3784 | let p = a.as_ptr(); |
3785 | let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101; |
3786 | let r = _mm512_maskz_expandloadu_epi8(m, black_box(p)); |
3787 | let e = _mm512_set_epi8( |
3788 | 32, 31, 30, 0, 29, 0, 0, 0, 28, 27, 0, 0, 26, 0, 25, 0, 24, 23, 22, 21, 0, 0, 0, 0, 0, |
3789 | 0, 0, 0, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, |
3790 | 7, 0, 6, 0, 5, 0, 0, 4, 0, 3, 0, 2, 0, 1, |
3791 | ); |
3792 | assert_eq_m512i(r, e); |
3793 | } |
3794 | |
3795 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3796 | unsafe fn test_mm256_mask_expandloadu_epi8() { |
3797 | let src = _mm256_set1_epi8(42); |
3798 | let a = &[ |
3799 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
3800 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
3801 | ]; |
3802 | let p = a.as_ptr(); |
3803 | let m = 0b11101000_11001010_11110000_00001111; |
3804 | let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p)); |
3805 | let e = _mm256_set_epi8( |
3806 | 16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42, |
3807 | 42, 42, 42, 42, 42, 4, 3, 2, 1, |
3808 | ); |
3809 | assert_eq_m256i(r, e); |
3810 | } |
3811 | |
3812 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3813 | unsafe fn test_mm256_maskz_expandloadu_epi8() { |
3814 | let a = &[ |
3815 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
3816 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
3817 | ]; |
3818 | let p = a.as_ptr(); |
3819 | let m = 0b11101000_11001010_11110000_00001111; |
3820 | let r = _mm256_maskz_expandloadu_epi8(m, black_box(p)); |
3821 | let e = _mm256_set_epi8( |
3822 | 16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0, |
3823 | 0, 4, 3, 2, 1, |
3824 | ); |
3825 | assert_eq_m256i(r, e); |
3826 | } |
3827 | |
3828 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3829 | unsafe fn test_mm_mask_expandloadu_epi8() { |
3830 | let src = _mm_set1_epi8(42); |
3831 | let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
3832 | let p = a.as_ptr(); |
3833 | let m = 0b11101000_11001010; |
3834 | let r = _mm_mask_expandloadu_epi8(src, m, black_box(p)); |
3835 | let e = _mm_set_epi8(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42); |
3836 | assert_eq_m128i(r, e); |
3837 | } |
3838 | |
3839 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3840 | unsafe fn test_mm_maskz_expandloadu_epi8() { |
3841 | let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
3842 | let p = a.as_ptr(); |
3843 | let m = 0b11101000_11001010; |
3844 | let r = _mm_maskz_expandloadu_epi8(m, black_box(p)); |
3845 | let e = _mm_set_epi8(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0); |
3846 | assert_eq_m128i(r, e); |
3847 | } |
3848 | |
3849 | #[simd_test(enable = "avx512vbmi2" )] |
3850 | unsafe fn test_mm512_mask_compressstoreu_epi16() { |
3851 | let a = _mm512_set_epi16( |
3852 | 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, |
3853 | 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, |
3854 | ); |
3855 | let mut r = [0_i16; 32]; |
3856 | _mm512_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a); |
3857 | assert_eq!(&r, &[0_i16; 32]); |
3858 | _mm512_mask_compressstoreu_epi16( |
3859 | r.as_mut_ptr() as *mut _, |
3860 | 0b11110000_11001010_11111111_00000000, |
3861 | a, |
3862 | ); |
3863 | assert_eq!( |
3864 | &r, |
3865 | &[ |
3866 | 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, |
3867 | 0, 0, 0, 0, 0, 0, 0, 0, 0 |
3868 | ] |
3869 | ); |
3870 | } |
3871 | |
3872 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3873 | unsafe fn test_mm256_mask_compressstoreu_epi16() { |
3874 | let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); |
3875 | let mut r = [0_i16; 16]; |
3876 | _mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a); |
3877 | assert_eq!(&r, &[0_i16; 16]); |
3878 | _mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0b11110000_11001010, a); |
3879 | assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]); |
3880 | } |
3881 | |
3882 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3883 | unsafe fn test_mm_mask_compressstoreu_epi16() { |
3884 | let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); |
3885 | let mut r = [0_i16; 8]; |
3886 | _mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a); |
3887 | assert_eq!(&r, &[0_i16; 8]); |
3888 | _mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0b11110000, a); |
3889 | assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]); |
3890 | } |
3891 | |
3892 | #[simd_test(enable = "avx512vbmi2" )] |
3893 | unsafe fn test_mm512_mask_compressstoreu_epi8() { |
3894 | let a = _mm512_set_epi8( |
3895 | 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, |
3896 | 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, |
3897 | 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, |
3898 | ); |
3899 | let mut r = [0_i8; 64]; |
3900 | _mm512_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a); |
3901 | assert_eq!(&r, &[0_i8; 64]); |
3902 | _mm512_mask_compressstoreu_epi8( |
3903 | r.as_mut_ptr() as *mut _, |
3904 | 0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111, |
3905 | a, |
3906 | ); |
3907 | assert_eq!( |
3908 | &r, |
3909 | &[ |
3910 | 1, 2, 3, 4, 13, 14, 15, 16, 17, 19, 21, 23, 26, 28, 30, 32, 41, 42, 43, 44, 45, 46, |
3911 | 47, 48, 50, 52, 55, 56, 61, 62, 63, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
3912 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
3913 | ] |
3914 | ); |
3915 | } |
3916 | |
3917 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3918 | unsafe fn test_mm256_mask_compressstoreu_epi8() { |
3919 | let a = _mm256_set_epi8( |
3920 | 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, |
3921 | 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, |
3922 | ); |
3923 | let mut r = [0_i8; 32]; |
3924 | _mm256_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a); |
3925 | assert_eq!(&r, &[0_i8; 32]); |
3926 | _mm256_mask_compressstoreu_epi8( |
3927 | r.as_mut_ptr() as *mut _, |
3928 | 0b11110000_11001010_11111111_00000000, |
3929 | a, |
3930 | ); |
3931 | assert_eq!( |
3932 | &r, |
3933 | &[ |
3934 | 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, |
3935 | 0, 0, 0, 0, 0, 0, 0, 0, 0 |
3936 | ] |
3937 | ); |
3938 | } |
3939 | |
3940 | #[simd_test(enable = "avx512vbmi2,avx512vl" )] |
3941 | unsafe fn test_mm_mask_compressstoreu_epi8() { |
3942 | let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); |
3943 | let mut r = [0_i8; 16]; |
3944 | _mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a); |
3945 | assert_eq!(&r, &[0_i8; 16]); |
3946 | _mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0b11110000_11001010, a); |
3947 | assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]); |
3948 | } |
3949 | } |
3950 | |