1use crate::{
2 arch::asm,
3 core_arch::{simd::*, x86::*},
4 intrinsics::simd::*,
5};
6
7#[cfg(test)]
8use stdarch_test::assert_instr;
9
10/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11///
12/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi16)
13#[inline]
14#[target_feature(enable = "avx512f,avx512bw,avx512vbmi2")]
15#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16pub unsafe fn _mm512_mask_expandloadu_epi16(
17 src: __m512i,
18 k: __mmask32,
19 mem_addr: *const i16,
20) -> __m512i {
21 let mut dst: __m512i = src;
22 asm!(
23 vpl!("vpexpandw {dst}{{{k}}}"),
24 p = in(reg) mem_addr,
25 k = in(kreg) k,
26 dst = inout(zmm_reg) dst,
27 options(pure, readonly, nostack)
28 );
29 dst
30}
31
32/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
33///
34/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi16)
35#[inline]
36#[target_feature(enable = "avx512f,avx512bw,avx512vbmi2")]
37#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38pub unsafe fn _mm512_maskz_expandloadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
39 let mut dst: __m512i;
40 asm!(
41 vpl!("vpexpandw {dst}{{{k}}} {{z}}"),
42 p = in(reg) mem_addr,
43 k = in(kreg) k,
44 dst = out(zmm_reg) dst,
45 options(pure, readonly, nostack)
46 );
47 dst
48}
49
50/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
51///
52/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi16)
53#[inline]
54#[target_feature(enable = "avx512f,avx512vbmi2,avx512vl,avx")]
55#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
56pub unsafe fn _mm256_mask_expandloadu_epi16(
57 src: __m256i,
58 k: __mmask16,
59 mem_addr: *const i16,
60) -> __m256i {
61 let mut dst: __m256i = src;
62 asm!(
63 vpl!("vpexpandw {dst}{{{k}}}"),
64 p = in(reg) mem_addr,
65 k = in(kreg) k,
66 dst = inout(ymm_reg) dst,
67 options(pure, readonly, nostack)
68 );
69 dst
70}
71
72/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
73///
74/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi16)
75#[inline]
76#[target_feature(enable = "avx512f,avx512vbmi2,avx512vl,avx")]
77#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
78pub unsafe fn _mm256_maskz_expandloadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
79 let mut dst: __m256i;
80 asm!(
81 vpl!("vpexpandw {dst}{{{k}}} {{z}}"),
82 p = in(reg) mem_addr,
83 k = in(kreg) k,
84 dst = out(ymm_reg) dst,
85 options(pure, readonly, nostack)
86 );
87 dst
88}
89
90/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
91///
92/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi16)
93#[inline]
94#[target_feature(enable = "avx512f,avx512vbmi2,avx512vl,avx,sse")]
95#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
96pub unsafe fn _mm_mask_expandloadu_epi16(
97 src: __m128i,
98 k: __mmask8,
99 mem_addr: *const i16,
100) -> __m128i {
101 let mut dst: __m128i = src;
102 asm!(
103 vpl!("vpexpandw {dst}{{{k}}}"),
104 p = in(reg) mem_addr,
105 k = in(kreg) k,
106 dst = inout(xmm_reg) dst,
107 options(pure, readonly, nostack)
108 );
109 dst
110}
111
112/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
113///
114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi16)
115#[inline]
116#[target_feature(enable = "avx512f,avx512vbmi2,avx512vl,avx,sse")]
117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
118pub unsafe fn _mm_maskz_expandloadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
119 let mut dst: __m128i;
120 asm!(
121 vpl!("vpexpandw {dst}{{{k}}} {{z}}"),
122 p = in(reg) mem_addr,
123 k = in(kreg) k,
124 dst = out(xmm_reg) dst,
125 options(pure, readonly, nostack)
126 );
127 dst
128}
129
130/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
131///
132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi8)
133#[inline]
134#[target_feature(enable = "avx512f,avx512bw,avx512vbmi2")]
135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
136pub unsafe fn _mm512_mask_expandloadu_epi8(
137 src: __m512i,
138 k: __mmask64,
139 mem_addr: *const i8,
140) -> __m512i {
141 let mut dst: __m512i = src;
142 asm!(
143 vpl!("vpexpandb {dst}{{{k}}}"),
144 p = in(reg) mem_addr,
145 k = in(kreg) k,
146 dst = inout(zmm_reg) dst,
147 options(pure, readonly, nostack)
148 );
149 dst
150}
151
152/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
153///
154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi8)
155#[inline]
156#[target_feature(enable = "avx512f,avx512bw,avx512vbmi2")]
157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
158pub unsafe fn _mm512_maskz_expandloadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
159 let mut dst: __m512i;
160 asm!(
161 vpl!("vpexpandb {dst}{{{k}}} {{z}}"),
162 p = in(reg) mem_addr,
163 k = in(kreg) k,
164 dst = out(zmm_reg) dst,
165 options(pure, readonly, nostack)
166 );
167 dst
168}
169
170/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
171///
172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi8)
173#[inline]
174#[target_feature(enable = "avx512f,avx512bw,avx512vbmi2,avx512vl,avx")]
175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
176pub unsafe fn _mm256_mask_expandloadu_epi8(
177 src: __m256i,
178 k: __mmask32,
179 mem_addr: *const i8,
180) -> __m256i {
181 let mut dst: __m256i = src;
182 asm!(
183 vpl!("vpexpandb {dst}{{{k}}}"),
184 p = in(reg) mem_addr,
185 k = in(kreg) k,
186 dst = inout(ymm_reg) dst,
187 options(pure, readonly, nostack)
188 );
189 dst
190}
191
192/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
193///
194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi8)
195#[inline]
196#[target_feature(enable = "avx512f,avx512bw,avx512vbmi2,avx512vl,avx")]
197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
198pub unsafe fn _mm256_maskz_expandloadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
199 let mut dst: __m256i;
200 asm!(
201 vpl!("vpexpandb {dst}{{{k}}} {{z}}"),
202 p = in(reg) mem_addr,
203 k = in(kreg) k,
204 dst = out(ymm_reg) dst,
205 options(pure, readonly, nostack)
206 );
207 dst
208}
209
210/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
211///
212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi8)
213#[inline]
214#[target_feature(enable = "avx512f,avx512vbmi2,avx512vl,avx,sse")]
215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
216pub unsafe fn _mm_mask_expandloadu_epi8(
217 src: __m128i,
218 k: __mmask16,
219 mem_addr: *const i8,
220) -> __m128i {
221 let mut dst: __m128i = src;
222 asm!(
223 vpl!("vpexpandb {dst}{{{k}}}"),
224 p = in(reg) mem_addr,
225 k = in(kreg) k,
226 dst = inout(xmm_reg) dst,
227 options(pure, readonly, nostack)
228 );
229 dst
230}
231
232/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
233///
234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi8)
235#[inline]
236#[target_feature(enable = "avx512f,avx512vbmi2,avx512vl,avx,sse")]
237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
238pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
239 let mut dst: __m128i;
240 asm!(
241 vpl!("vpexpandb {dst}{{{k}}} {{z}}"),
242 p = in(reg) mem_addr,
243 k = in(kreg) k,
244 dst = out(xmm_reg) dst,
245 options(pure, readonly, nostack)
246 );
247 dst
248}
249
250/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
251///
252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi16)
253#[inline]
254#[target_feature(enable = "avx512vbmi2")]
255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
256#[cfg_attr(test, assert_instr(vpcompressw))]
257pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask32, a: __m512i) {
258 vcompressstorew(mem:base_addr as *mut _, data:a.as_i16x32(), mask:k)
259}
260
261/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi16)
264#[inline]
265#[target_feature(enable = "avx512vbmi2,avx512vl")]
266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
267#[cfg_attr(test, assert_instr(vpcompressw))]
268pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask16, a: __m256i) {
269 vcompressstorew256(mem:base_addr as *mut _, data:a.as_i16x16(), mask:k)
270}
271
272/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
273///
274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi16)
275#[inline]
276#[target_feature(enable = "avx512vbmi2,avx512vl")]
277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
278#[cfg_attr(test, assert_instr(vpcompressw))]
279pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask8, a: __m128i) {
280 vcompressstorew128(mem:base_addr as *mut _, data:a.as_i16x8(), mask:k)
281}
282
283/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
284///
285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi8)
286#[inline]
287#[target_feature(enable = "avx512vbmi2")]
288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
289#[cfg_attr(test, assert_instr(vpcompressb))]
290pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask64, a: __m512i) {
291 vcompressstoreb(mem:base_addr as *mut _, data:a.as_i8x64(), mask:k)
292}
293
294/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
295///
296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi8)
297#[inline]
298#[target_feature(enable = "avx512vbmi2,avx512vl")]
299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
300#[cfg_attr(test, assert_instr(vpcompressb))]
301pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask32, a: __m256i) {
302 vcompressstoreb256(mem:base_addr as *mut _, data:a.as_i8x32(), mask:k)
303}
304
305/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
306///
307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi8)
308#[inline]
309#[target_feature(enable = "avx512vbmi2,avx512vl")]
310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
311#[cfg_attr(test, assert_instr(vpcompressb))]
312pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask16, a: __m128i) {
313 vcompressstoreb128(mem:base_addr as *mut _, data:a.as_i8x16(), mask:k)
314}
315
316/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
317///
318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi16&expand=1192)
319#[inline]
320#[target_feature(enable = "avx512vbmi2")]
321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
322#[cfg_attr(test, assert_instr(vpcompressw))]
323pub unsafe fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
324 transmute(src:vpcompressw(a:a.as_i16x32(), src:src.as_i16x32(), mask:k))
325}
326
327/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
328///
329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi16&expand=1193)
330#[inline]
331#[target_feature(enable = "avx512vbmi2")]
332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
333#[cfg_attr(test, assert_instr(vpcompressw))]
334pub unsafe fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i {
335 transmute(src:vpcompressw(
336 a:a.as_i16x32(),
337 src:_mm512_setzero_si512().as_i16x32(),
338 mask:k,
339 ))
340}
341
342/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
343///
344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi16&expand=1190)
345#[inline]
346#[target_feature(enable = "avx512vbmi2,avx512vl")]
347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
348#[cfg_attr(test, assert_instr(vpcompressw))]
349pub unsafe fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
350 transmute(src:vpcompressw256(a:a.as_i16x16(), src:src.as_i16x16(), mask:k))
351}
352
353/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
354///
355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi16&expand=1191)
356#[inline]
357#[target_feature(enable = "avx512vbmi2,avx512vl")]
358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
359#[cfg_attr(test, assert_instr(vpcompressw))]
360pub unsafe fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i {
361 transmute(src:vpcompressw256(
362 a:a.as_i16x16(),
363 src:_mm256_setzero_si256().as_i16x16(),
364 mask:k,
365 ))
366}
367
368/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
369///
370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi16&expand=1188)
371#[inline]
372#[target_feature(enable = "avx512vbmi2,avx512vl")]
373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
374#[cfg_attr(test, assert_instr(vpcompressw))]
375pub unsafe fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
376 transmute(src:vpcompressw128(a:a.as_i16x8(), src:src.as_i16x8(), mask:k))
377}
378
379/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
380///
381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi16&expand=1189)
382#[inline]
383#[target_feature(enable = "avx512vbmi2,avx512vl")]
384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
385#[cfg_attr(test, assert_instr(vpcompressw))]
386pub unsafe fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i {
387 transmute(src:vpcompressw128(
388 a:a.as_i16x8(),
389 src:_mm_setzero_si128().as_i16x8(),
390 mask:k,
391 ))
392}
393
394/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
395///
396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi8&expand=1210)
397#[inline]
398#[target_feature(enable = "avx512vbmi2")]
399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
400#[cfg_attr(test, assert_instr(vpcompressb))]
401pub unsafe fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
402 transmute(src:vpcompressb(a:a.as_i8x64(), src:src.as_i8x64(), mask:k))
403}
404
405/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
406///
407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi8&expand=1211)
408#[inline]
409#[target_feature(enable = "avx512vbmi2")]
410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
411#[cfg_attr(test, assert_instr(vpcompressb))]
412pub unsafe fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i {
413 transmute(src:vpcompressb(
414 a:a.as_i8x64(),
415 src:_mm512_setzero_si512().as_i8x64(),
416 mask:k,
417 ))
418}
419
420/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
421///
422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi8&expand=1208)
423#[inline]
424#[target_feature(enable = "avx512vbmi2,avx512vl")]
425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
426#[cfg_attr(test, assert_instr(vpcompressb))]
427pub unsafe fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
428 transmute(src:vpcompressb256(a:a.as_i8x32(), src:src.as_i8x32(), mask:k))
429}
430
431/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
432///
433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi8&expand=1209)
434#[inline]
435#[target_feature(enable = "avx512vbmi2,avx512vl")]
436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
437#[cfg_attr(test, assert_instr(vpcompressb))]
438pub unsafe fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i {
439 transmute(src:vpcompressb256(
440 a:a.as_i8x32(),
441 src:_mm256_setzero_si256().as_i8x32(),
442 mask:k,
443 ))
444}
445
446/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
447///
448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi8&expand=1206)
449#[inline]
450#[target_feature(enable = "avx512vbmi2,avx512vl")]
451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
452#[cfg_attr(test, assert_instr(vpcompressb))]
453pub unsafe fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
454 transmute(src:vpcompressb128(a:a.as_i8x16(), src:src.as_i8x16(), mask:k))
455}
456
457/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
458///
459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi8&expand=1207)
460#[inline]
461#[target_feature(enable = "avx512vbmi2,avx512vl")]
462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
463#[cfg_attr(test, assert_instr(vpcompressb))]
464pub unsafe fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i {
465 transmute(src:vpcompressb128(
466 a:a.as_i8x16(),
467 src:_mm_setzero_si128().as_i8x16(),
468 mask:k,
469 ))
470}
471
472/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
473///
474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi16&expand=2310)
475#[inline]
476#[target_feature(enable = "avx512vbmi2")]
477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
478#[cfg_attr(test, assert_instr(vpexpandw))]
479pub unsafe fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
480 transmute(src:vpexpandw(a:a.as_i16x32(), src:src.as_i16x32(), mask:k))
481}
482
483/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
484///
485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi16&expand=2311)
486#[inline]
487#[target_feature(enable = "avx512vbmi2")]
488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
489#[cfg_attr(test, assert_instr(vpexpandw))]
490pub unsafe fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i {
491 transmute(src:vpexpandw(
492 a:a.as_i16x32(),
493 src:_mm512_setzero_si512().as_i16x32(),
494 mask:k,
495 ))
496}
497
498/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
499///
500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi16&expand=2308)
501#[inline]
502#[target_feature(enable = "avx512vbmi2,avx512vl")]
503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
504#[cfg_attr(test, assert_instr(vpexpandw))]
505pub unsafe fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
506 transmute(src:vpexpandw256(a:a.as_i16x16(), src:src.as_i16x16(), mask:k))
507}
508
509/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
510///
511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi16&expand=2309)
512#[inline]
513#[target_feature(enable = "avx512vbmi2,avx512vl")]
514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
515#[cfg_attr(test, assert_instr(vpexpandw))]
516pub unsafe fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i {
517 transmute(src:vpexpandw256(
518 a:a.as_i16x16(),
519 src:_mm256_setzero_si256().as_i16x16(),
520 mask:k,
521 ))
522}
523
524/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
525///
526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi16&expand=2306)
527#[inline]
528#[target_feature(enable = "avx512vbmi2,avx512vl")]
529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
530#[cfg_attr(test, assert_instr(vpexpandw))]
531pub unsafe fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
532 transmute(src:vpexpandw128(a:a.as_i16x8(), src:src.as_i16x8(), mask:k))
533}
534
535/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
536///
537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi16&expand=2307)
538#[inline]
539#[target_feature(enable = "avx512vbmi2,avx512vl")]
540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
541#[cfg_attr(test, assert_instr(vpexpandw))]
542pub unsafe fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i {
543 transmute(src:vpexpandw128(
544 a:a.as_i16x8(),
545 src:_mm_setzero_si128().as_i16x8(),
546 mask:k,
547 ))
548}
549
550/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
551///
552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi8&expand=2328)
553#[inline]
554#[target_feature(enable = "avx512vbmi2")]
555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
556#[cfg_attr(test, assert_instr(vpexpandb))]
557pub unsafe fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
558 transmute(src:vpexpandb(a:a.as_i8x64(), src:src.as_i8x64(), mask:k))
559}
560
561/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
562///
563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi8&expand=2329)
564#[inline]
565#[target_feature(enable = "avx512vbmi2")]
566#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
567#[cfg_attr(test, assert_instr(vpexpandb))]
568pub unsafe fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i {
569 transmute(src:vpexpandb(
570 a:a.as_i8x64(),
571 src:_mm512_setzero_si512().as_i8x64(),
572 mask:k,
573 ))
574}
575
576/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
577///
578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi8&expand=2326)
579#[inline]
580#[target_feature(enable = "avx512vbmi2,avx512vl")]
581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
582#[cfg_attr(test, assert_instr(vpexpandb))]
583pub unsafe fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
584 transmute(src:vpexpandb256(a:a.as_i8x32(), src:src.as_i8x32(), mask:k))
585}
586
587/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
588///
589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi8&expand=2327)
590#[inline]
591#[target_feature(enable = "avx512vbmi2,avx512vl")]
592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
593#[cfg_attr(test, assert_instr(vpexpandb))]
594pub unsafe fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i {
595 transmute(src:vpexpandb256(
596 a:a.as_i8x32(),
597 src:_mm256_setzero_si256().as_i8x32(),
598 mask:k,
599 ))
600}
601
602/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
603///
604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi8&expand=2324)
605#[inline]
606#[target_feature(enable = "avx512vbmi2,avx512vl")]
607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
608#[cfg_attr(test, assert_instr(vpexpandb))]
609pub unsafe fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
610 transmute(src:vpexpandb128(a:a.as_i8x16(), src:src.as_i8x16(), mask:k))
611}
612
613/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
614///
615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi8&expand=2325)
616#[inline]
617#[target_feature(enable = "avx512vbmi2,avx512vl")]
618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
619#[cfg_attr(test, assert_instr(vpexpandb))]
620pub unsafe fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i {
621 transmute(src:vpexpandb128(
622 a:a.as_i8x16(),
623 src:_mm_setzero_si128().as_i8x16(),
624 mask:k,
625 ))
626}
627
628/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
629///
630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi64&expand=5087)
631#[inline]
632#[target_feature(enable = "avx512vbmi2")]
633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
634#[cfg_attr(test, assert_instr(vpshldvq))]
635pub unsafe fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
636 transmute(src:vpshldvq(a:a.as_i64x8(), b:b.as_i64x8(), c:c.as_i64x8()))
637}
638
639/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
640///
641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi64&expand=5085)
642#[inline]
643#[target_feature(enable = "avx512vbmi2")]
644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
645#[cfg_attr(test, assert_instr(vpshldvq))]
646pub unsafe fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
647 let shf: i64x8 = _mm512_shldv_epi64(a, b, c).as_i64x8();
648 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x8()))
649}
650
651/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
652///
653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi64&expand=5086)
654#[inline]
655#[target_feature(enable = "avx512vbmi2")]
656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
657#[cfg_attr(test, assert_instr(vpshldvq))]
658pub unsafe fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
659 let shf: i64x8 = _mm512_shldv_epi64(a, b, c).as_i64x8();
660 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
661 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
662}
663
664/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
665///
666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi64&expand=5084)
667#[inline]
668#[target_feature(enable = "avx512vbmi2,avx512vl")]
669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
670#[cfg_attr(test, assert_instr(vpshldvq))]
671pub unsafe fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
672 transmute(src:vpshldvq256(a:a.as_i64x4(), b:b.as_i64x4(), c:c.as_i64x4()))
673}
674
675/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
676///
677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi64&expand=5082)
678#[inline]
679#[target_feature(enable = "avx512vbmi2,avx512vl")]
680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
681#[cfg_attr(test, assert_instr(vpshldvq))]
682pub unsafe fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
683 let shf: i64x4 = _mm256_shldv_epi64(a, b, c).as_i64x4();
684 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x4()))
685}
686
687/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
688///
689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi64&expand=5083)
690#[inline]
691#[target_feature(enable = "avx512vbmi2,avx512vl")]
692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
693#[cfg_attr(test, assert_instr(vpshldvq))]
694pub unsafe fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
695 let shf: i64x4 = _mm256_shldv_epi64(a, b, c).as_i64x4();
696 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
697 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
698}
699
700/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
701///
702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi64&expand=5081)
703#[inline]
704#[target_feature(enable = "avx512vbmi2,avx512vl")]
705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
706#[cfg_attr(test, assert_instr(vpshldvq))]
707pub unsafe fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
708 transmute(src:vpshldvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:c.as_i64x2()))
709}
710
711/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
712///
713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi64&expand=5079)
714#[inline]
715#[target_feature(enable = "avx512vbmi2,avx512vl")]
716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
717#[cfg_attr(test, assert_instr(vpshldvq))]
718pub unsafe fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
719 let shf: i64x2 = _mm_shldv_epi64(a, b, c).as_i64x2();
720 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x2()))
721}
722
723/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
724///
725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi64&expand=5080)
726#[inline]
727#[target_feature(enable = "avx512vbmi2,avx512vl")]
728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
729#[cfg_attr(test, assert_instr(vpshldvq))]
730pub unsafe fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
731 let shf: i64x2 = _mm_shldv_epi64(a, b, c).as_i64x2();
732 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
733 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
734}
735
736/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
737///
738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi32&expand=5078)
739#[inline]
740#[target_feature(enable = "avx512vbmi2")]
741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
742#[cfg_attr(test, assert_instr(vpshldvd))]
743pub unsafe fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
744 transmute(src:vpshldvd(a:a.as_i32x16(), b:b.as_i32x16(), c:c.as_i32x16()))
745}
746
747/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
748///
749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi32&expand=5076)
750#[inline]
751#[target_feature(enable = "avx512vbmi2")]
752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
753#[cfg_attr(test, assert_instr(vpshldvd))]
754pub unsafe fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
755 let shf: i32x16 = _mm512_shldv_epi32(a, b, c).as_i32x16();
756 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x16()))
757}
758
759/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi32&expand=5077)
762#[inline]
763#[target_feature(enable = "avx512vbmi2")]
764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
765#[cfg_attr(test, assert_instr(vpshldvd))]
766pub unsafe fn _mm512_maskz_shldv_epi32(
767 k: __mmask16,
768 a: __m512i,
769 b: __m512i,
770 c: __m512i,
771) -> __m512i {
772 let shf: i32x16 = _mm512_shldv_epi32(a, b, c).as_i32x16();
773 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
774 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
775}
776
777/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
778///
779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi32&expand=5075)
780#[inline]
781#[target_feature(enable = "avx512vbmi2,avx512vl")]
782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
783#[cfg_attr(test, assert_instr(vpshldvd))]
784pub unsafe fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
785 transmute(src:vpshldvd256(a:a.as_i32x8(), b:b.as_i32x8(), c:c.as_i32x8()))
786}
787
788/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
789///
790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi32&expand=5073)
791#[inline]
792#[target_feature(enable = "avx512vbmi2,avx512vl")]
793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
794#[cfg_attr(test, assert_instr(vpshldvd))]
795pub unsafe fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
796 let shf: i32x8 = _mm256_shldv_epi32(a, b, c).as_i32x8();
797 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x8()))
798}
799
800/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
801///
802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi32&expand=5074)
803#[inline]
804#[target_feature(enable = "avx512vbmi2,avx512vl")]
805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
806#[cfg_attr(test, assert_instr(vpshldvd))]
807pub unsafe fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
808 let shf: i32x8 = _mm256_shldv_epi32(a, b, c).as_i32x8();
809 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
810 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
811}
812
813/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
814///
815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi32&expand=5072)
816#[inline]
817#[target_feature(enable = "avx512vbmi2,avx512vl")]
818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
819#[cfg_attr(test, assert_instr(vpshldvd))]
820pub unsafe fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
821 transmute(src:vpshldvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:c.as_i32x4()))
822}
823
824/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
825///
826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi32&expand=5070)
827#[inline]
828#[target_feature(enable = "avx512vbmi2,avx512vl")]
829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
830#[cfg_attr(test, assert_instr(vpshldvd))]
831pub unsafe fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
832 let shf: i32x4 = _mm_shldv_epi32(a, b, c).as_i32x4();
833 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x4()))
834}
835
836/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
837///
838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi32&expand=5071)
839#[inline]
840#[target_feature(enable = "avx512vbmi2,avx512vl")]
841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
842#[cfg_attr(test, assert_instr(vpshldvd))]
843pub unsafe fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
844 let shf: i32x4 = _mm_shldv_epi32(a, b, c).as_i32x4();
845 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
846 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
847}
848
849/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
850///
851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi16&expand=5069)
852#[inline]
853#[target_feature(enable = "avx512vbmi2")]
854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
855#[cfg_attr(test, assert_instr(vpshldvw))]
856pub unsafe fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
857 transmute(src:vpshldvw(a:a.as_i16x32(), b:b.as_i16x32(), c:c.as_i16x32()))
858}
859
860/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
861///
862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi16&expand=5067)
863#[inline]
864#[target_feature(enable = "avx512vbmi2")]
865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
866#[cfg_attr(test, assert_instr(vpshldvw))]
867pub unsafe fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
868 let shf: i16x32 = _mm512_shldv_epi16(a, b, c).as_i16x32();
869 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x32()))
870}
871
872/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
873///
874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi16&expand=5068)
875#[inline]
876#[target_feature(enable = "avx512vbmi2")]
877#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
878#[cfg_attr(test, assert_instr(vpshldvw))]
879pub unsafe fn _mm512_maskz_shldv_epi16(
880 k: __mmask32,
881 a: __m512i,
882 b: __m512i,
883 c: __m512i,
884) -> __m512i {
885 let shf: i16x32 = _mm512_shldv_epi16(a, b, c).as_i16x32();
886 let zero: i16x32 = _mm512_setzero_si512().as_i16x32();
887 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
888}
889
890/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
891///
892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi16&expand=5066)
893#[inline]
894#[target_feature(enable = "avx512vbmi2,avx512vl")]
895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
896#[cfg_attr(test, assert_instr(vpshldvw))]
897pub unsafe fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
898 transmute(src:vpshldvw256(a:a.as_i16x16(), b:b.as_i16x16(), c:c.as_i16x16()))
899}
900
901/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
902///
903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi16&expand=5064)
904#[inline]
905#[target_feature(enable = "avx512vbmi2,avx512vl")]
906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
907#[cfg_attr(test, assert_instr(vpshldvw))]
908pub unsafe fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
909 let shf: i16x16 = _mm256_shldv_epi16(a, b, c).as_i16x16();
910 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x16()))
911}
912
913/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
914///
915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi16&expand=5065)
916#[inline]
917#[target_feature(enable = "avx512vbmi2,avx512vl")]
918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
919#[cfg_attr(test, assert_instr(vpshldvw))]
920pub unsafe fn _mm256_maskz_shldv_epi16(
921 k: __mmask16,
922 a: __m256i,
923 b: __m256i,
924 c: __m256i,
925) -> __m256i {
926 let shf: i16x16 = _mm256_shldv_epi16(a, b, c).as_i16x16();
927 let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
928 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
929}
930
931/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
932///
933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi16&expand=5063)
934#[inline]
935#[target_feature(enable = "avx512vbmi2,avx512vl")]
936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
937#[cfg_attr(test, assert_instr(vpshldvw))]
938pub unsafe fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
939 transmute(src:vpshldvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:c.as_i16x8()))
940}
941
942/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
943///
944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi16&expand=5061)
945#[inline]
946#[target_feature(enable = "avx512vbmi2,avx512vl")]
947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
948#[cfg_attr(test, assert_instr(vpshldvw))]
949pub unsafe fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
950 let shf: i16x8 = _mm_shldv_epi16(a, b, c).as_i16x8();
951 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x8()))
952}
953
954/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
955///
956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi16&expand=5062)
957#[inline]
958#[target_feature(enable = "avx512vbmi2,avx512vl")]
959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
960#[cfg_attr(test, assert_instr(vpshldvw))]
961pub unsafe fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
962 let shf: i16x8 = _mm_shldv_epi16(a, b, c).as_i16x8();
963 let zero: i16x8 = _mm_setzero_si128().as_i16x8();
964 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
965}
966
967/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
968///
969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi64&expand=5141)
970#[inline]
971#[target_feature(enable = "avx512vbmi2")]
972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
973#[cfg_attr(test, assert_instr(vpshrdvq))]
974pub unsafe fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
975 transmute(src:vpshrdvq(a:a.as_i64x8(), b:b.as_i64x8(), c:c.as_i64x8()))
976}
977
978/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
979///
980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi64&expand=5139)
981#[inline]
982#[target_feature(enable = "avx512vbmi2")]
983#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
984#[cfg_attr(test, assert_instr(vpshrdvq))]
985pub unsafe fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
986 let shf: i64x8 = _mm512_shrdv_epi64(a, b, c).as_i64x8();
987 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x8()))
988}
989
990/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
991///
992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi64&expand=5140)
993#[inline]
994#[target_feature(enable = "avx512vbmi2")]
995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
996#[cfg_attr(test, assert_instr(vpshrdvq))]
997pub unsafe fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
998 let shf: i64x8 = _mm512_shrdv_epi64(a, b, c).as_i64x8();
999 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
1000 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1001}
1002
1003/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
1004///
1005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi64&expand=5138)
1006#[inline]
1007#[target_feature(enable = "avx512vbmi2,avx512vl")]
1008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1009#[cfg_attr(test, assert_instr(vpshrdvq))]
1010pub unsafe fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1011 transmute(src:vpshrdvq256(a:a.as_i64x4(), b:b.as_i64x4(), c:c.as_i64x4()))
1012}
1013
1014/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1015///
1016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi64&expand=5136)
1017#[inline]
1018#[target_feature(enable = "avx512vbmi2,avx512vl")]
1019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1020#[cfg_attr(test, assert_instr(vpshrdvq))]
1021pub unsafe fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
1022 let shf: i64x4 = _mm256_shrdv_epi64(a, b, c).as_i64x4();
1023 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x4()))
1024}
1025
1026/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1027///
1028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi64&expand=5137)
1029#[inline]
1030#[target_feature(enable = "avx512vbmi2,avx512vl")]
1031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1032#[cfg_attr(test, assert_instr(vpshrdvq))]
1033pub unsafe fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1034 let shf: i64x4 = _mm256_shrdv_epi64(a, b, c).as_i64x4();
1035 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1036 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1037}
1038
1039/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
1040///
1041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi64&expand=5135)
1042#[inline]
1043#[target_feature(enable = "avx512vbmi2,avx512vl")]
1044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1045#[cfg_attr(test, assert_instr(vpshrdvq))]
1046pub unsafe fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1047 transmute(src:vpshrdvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:c.as_i64x2()))
1048}
1049
1050/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1051///
1052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi64&expand=5133)
1053#[inline]
1054#[target_feature(enable = "avx512vbmi2,avx512vl")]
1055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1056#[cfg_attr(test, assert_instr(vpshrdvq))]
1057pub unsafe fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1058 let shf: i64x2 = _mm_shrdv_epi64(a, b, c).as_i64x2();
1059 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i64x2()))
1060}
1061
1062/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1063///
1064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi64&expand=5134)
1065#[inline]
1066#[target_feature(enable = "avx512vbmi2,avx512vl")]
1067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1068#[cfg_attr(test, assert_instr(vpshrdvq))]
1069pub unsafe fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1070 let shf: i64x2 = _mm_shrdv_epi64(a, b, c).as_i64x2();
1071 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1072 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1073}
1074
1075/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1076///
1077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi32&expand=5132)
1078#[inline]
1079#[target_feature(enable = "avx512vbmi2")]
1080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1081#[cfg_attr(test, assert_instr(vpshrdvd))]
1082pub unsafe fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1083 transmute(src:vpshrdvd(a:a.as_i32x16(), b:b.as_i32x16(), c:c.as_i32x16()))
1084}
1085
1086/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1087///
1088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi32&expand=5130)
1089#[inline]
1090#[target_feature(enable = "avx512vbmi2")]
1091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1092#[cfg_attr(test, assert_instr(vpshrdvd))]
1093pub unsafe fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
1094 let shf: i32x16 = _mm512_shrdv_epi32(a, b, c).as_i32x16();
1095 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x16()))
1096}
1097
1098/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1099///
1100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi32&expand=5131)
1101#[inline]
1102#[target_feature(enable = "avx512vbmi2")]
1103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1104#[cfg_attr(test, assert_instr(vpshrdvd))]
1105pub unsafe fn _mm512_maskz_shrdv_epi32(
1106 k: __mmask16,
1107 a: __m512i,
1108 b: __m512i,
1109 c: __m512i,
1110) -> __m512i {
1111 let shf: i32x16 = _mm512_shrdv_epi32(a, b, c).as_i32x16();
1112 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
1113 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1114}
1115
1116/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1117///
1118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi32&expand=5129)
1119#[inline]
1120#[target_feature(enable = "avx512vbmi2,avx512vl")]
1121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1122#[cfg_attr(test, assert_instr(vpshrdvd))]
1123pub unsafe fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1124 transmute(src:vpshrdvd256(a:a.as_i32x8(), b:b.as_i32x8(), c:c.as_i32x8()))
1125}
1126
1127/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1128///
1129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi32&expand=5127)
1130#[inline]
1131#[target_feature(enable = "avx512vbmi2,avx512vl")]
1132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1133#[cfg_attr(test, assert_instr(vpshrdvd))]
1134pub unsafe fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
1135 let shf: i32x8 = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1136 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x8()))
1137}
1138
1139/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1140///
1141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi32&expand=5128)
1142#[inline]
1143#[target_feature(enable = "avx512vbmi2,avx512vl")]
1144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1145#[cfg_attr(test, assert_instr(vpshrdvd))]
1146pub unsafe fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1147 let shf: i32x8 = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1148 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
1149 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1150}
1151
1152/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1153///
1154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi32&expand=5126)
1155#[inline]
1156#[target_feature(enable = "avx512vbmi2,avx512vl")]
1157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1158#[cfg_attr(test, assert_instr(vpshrdvd))]
1159pub unsafe fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1160 transmute(src:vpshrdvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:c.as_i32x4()))
1161}
1162
1163/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1164///
1165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi32&expand=5124)
1166#[inline]
1167#[target_feature(enable = "avx512vbmi2,avx512vl")]
1168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1169#[cfg_attr(test, assert_instr(vpshrdvd))]
1170pub unsafe fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1171 let shf: i32x4 = _mm_shrdv_epi32(a, b, c).as_i32x4();
1172 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i32x4()))
1173}
1174
1175/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1176///
1177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi32&expand=5125)
1178#[inline]
1179#[target_feature(enable = "avx512vbmi2,avx512vl")]
1180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1181#[cfg_attr(test, assert_instr(vpshrdvd))]
1182pub unsafe fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1183 let shf: i32x4 = _mm_shrdv_epi32(a, b, c).as_i32x4();
1184 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1185 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1186}
1187
1188/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1189///
1190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi16&expand=5123)
1191#[inline]
1192#[target_feature(enable = "avx512vbmi2")]
1193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1194#[cfg_attr(test, assert_instr(vpshrdvw))]
1195pub unsafe fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1196 transmute(src:vpshrdvw(a:a.as_i16x32(), b:b.as_i16x32(), c:c.as_i16x32()))
1197}
1198
1199/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1200///
1201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi16&expand=5121)
1202#[inline]
1203#[target_feature(enable = "avx512vbmi2")]
1204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1205#[cfg_attr(test, assert_instr(vpshrdvw))]
1206pub unsafe fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
1207 let shf: i16x32 = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1208 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x32()))
1209}
1210
1211/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1212///
1213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi16&expand=5122)
1214#[inline]
1215#[target_feature(enable = "avx512vbmi2")]
1216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1217#[cfg_attr(test, assert_instr(vpshrdvw))]
1218pub unsafe fn _mm512_maskz_shrdv_epi16(
1219 k: __mmask32,
1220 a: __m512i,
1221 b: __m512i,
1222 c: __m512i,
1223) -> __m512i {
1224 let shf: i16x32 = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1225 let zero: i16x32 = _mm512_setzero_si512().as_i16x32();
1226 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1227}
1228
1229/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1230///
1231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi16&expand=5120)
1232#[inline]
1233#[target_feature(enable = "avx512vbmi2,avx512vl")]
1234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1235#[cfg_attr(test, assert_instr(vpshrdvw))]
1236pub unsafe fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1237 transmute(src:vpshrdvw256(a:a.as_i16x16(), b:b.as_i16x16(), c:c.as_i16x16()))
1238}
1239
1240/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1241///
1242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi16&expand=5118)
1243#[inline]
1244#[target_feature(enable = "avx512vbmi2,avx512vl")]
1245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1246#[cfg_attr(test, assert_instr(vpshrdvw))]
1247pub unsafe fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
1248 let shf: i16x16 = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1249 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x16()))
1250}
1251
1252/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1253///
1254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi16&expand=5119)
1255#[inline]
1256#[target_feature(enable = "avx512vbmi2,avx512vl")]
1257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1258#[cfg_attr(test, assert_instr(vpshrdvw))]
1259pub unsafe fn _mm256_maskz_shrdv_epi16(
1260 k: __mmask16,
1261 a: __m256i,
1262 b: __m256i,
1263 c: __m256i,
1264) -> __m256i {
1265 let shf: i16x16 = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1266 let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
1267 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1268}
1269
1270/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1271///
1272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi16&expand=5117)
1273#[inline]
1274#[target_feature(enable = "avx512vbmi2,avx512vl")]
1275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1276#[cfg_attr(test, assert_instr(vpshrdvw))]
1277pub unsafe fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1278 transmute(src:vpshrdvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:c.as_i16x8()))
1279}
1280
1281/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1282///
1283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi16&expand=5115)
1284#[inline]
1285#[target_feature(enable = "avx512vbmi2,avx512vl")]
1286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1287#[cfg_attr(test, assert_instr(vpshrdvw))]
1288pub unsafe fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1289 let shf: i16x8 = _mm_shrdv_epi16(a, b, c).as_i16x8();
1290 transmute(src:simd_select_bitmask(m:k, yes:shf, no:a.as_i16x8()))
1291}
1292
1293/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1294///
1295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi16&expand=5116)
1296#[inline]
1297#[target_feature(enable = "avx512vbmi2,avx512vl")]
1298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1299#[cfg_attr(test, assert_instr(vpshrdvw))]
1300pub unsafe fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1301 let shf: i16x8 = _mm_shrdv_epi16(a, b, c).as_i16x8();
1302 let zero: i16x8 = _mm_setzero_si128().as_i16x8();
1303 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1304}
1305
1306/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1307///
1308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi64&expand=5060)
1309#[inline]
1310#[target_feature(enable = "avx512vbmi2")]
1311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1312#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1313#[rustc_legacy_const_generics(2)]
1314pub unsafe fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1315 static_assert_uimm_bits!(IMM8, 8);
1316 let imm8: i64 = IMM8 as i64;
1317 transmute(src:vpshldvq(
1318 a:a.as_i64x8(),
1319 b:b.as_i64x8(),
1320 c:_mm512_set1_epi64(imm8).as_i64x8(),
1321 ))
1322}
1323
1324/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1325///
1326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi64&expand=5058)
1327#[inline]
1328#[target_feature(enable = "avx512vbmi2")]
1329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1330#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1331#[rustc_legacy_const_generics(4)]
1332pub unsafe fn _mm512_mask_shldi_epi64<const IMM8: i32>(
1333 src: __m512i,
1334 k: __mmask8,
1335 a: __m512i,
1336 b: __m512i,
1337) -> __m512i {
1338 static_assert_uimm_bits!(IMM8, 8);
1339 let imm8: i64 = IMM8 as i64;
1340 let shf: i64x8 = vpshldvq(
1341 a:a.as_i64x8(),
1342 b:b.as_i64x8(),
1343 c:_mm512_set1_epi64(imm8).as_i64x8(),
1344 );
1345 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
1346}
1347
1348/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1349///
1350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi64&expand=5059)
1351#[inline]
1352#[target_feature(enable = "avx512vbmi2")]
1353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1354#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1355#[rustc_legacy_const_generics(3)]
1356pub unsafe fn _mm512_maskz_shldi_epi64<const IMM8: i32>(
1357 k: __mmask8,
1358 a: __m512i,
1359 b: __m512i,
1360) -> __m512i {
1361 static_assert_uimm_bits!(IMM8, 8);
1362 let imm8: i64 = IMM8 as i64;
1363 let shf: i64x8 = vpshldvq(
1364 a:a.as_i64x8(),
1365 b:b.as_i64x8(),
1366 c:_mm512_set1_epi64(imm8).as_i64x8(),
1367 );
1368 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
1369 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1370}
1371
1372/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1373///
1374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi64&expand=5057)
1375#[inline]
1376#[target_feature(enable = "avx512vbmi2,avx512vl")]
1377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1378#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1379#[rustc_legacy_const_generics(2)]
1380pub unsafe fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1381 static_assert_uimm_bits!(IMM8, 8);
1382 let imm8: i64 = IMM8 as i64;
1383 transmute(src:vpshldvq256(
1384 a:a.as_i64x4(),
1385 b:b.as_i64x4(),
1386 c:_mm256_set1_epi64x(imm8).as_i64x4(),
1387 ))
1388}
1389
1390/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1391///
1392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi64&expand=5055)
1393#[inline]
1394#[target_feature(enable = "avx512vbmi2,avx512vl")]
1395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1396#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1397#[rustc_legacy_const_generics(4)]
1398pub unsafe fn _mm256_mask_shldi_epi64<const IMM8: i32>(
1399 src: __m256i,
1400 k: __mmask8,
1401 a: __m256i,
1402 b: __m256i,
1403) -> __m256i {
1404 static_assert_uimm_bits!(IMM8, 8);
1405 let imm8: i64 = IMM8 as i64;
1406 let shf: i64x4 = vpshldvq256(
1407 a:a.as_i64x4(),
1408 b:b.as_i64x4(),
1409 c:_mm256_set1_epi64x(imm8).as_i64x4(),
1410 );
1411 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
1412}
1413
1414/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1415///
1416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi64&expand=5056)
1417#[inline]
1418#[target_feature(enable = "avx512vbmi2,avx512vl")]
1419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1420#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1421#[rustc_legacy_const_generics(3)]
1422pub unsafe fn _mm256_maskz_shldi_epi64<const IMM8: i32>(
1423 k: __mmask8,
1424 a: __m256i,
1425 b: __m256i,
1426) -> __m256i {
1427 static_assert_uimm_bits!(IMM8, 8);
1428 let imm8: i64 = IMM8 as i64;
1429 let shf: i64x4 = vpshldvq256(
1430 a:a.as_i64x4(),
1431 b:b.as_i64x4(),
1432 c:_mm256_set1_epi64x(imm8).as_i64x4(),
1433 );
1434 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1435 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1436}
1437
1438/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1439///
1440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi64&expand=5054)
1441#[inline]
1442#[target_feature(enable = "avx512vbmi2,avx512vl")]
1443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1444#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1445#[rustc_legacy_const_generics(2)]
1446pub unsafe fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1447 static_assert_uimm_bits!(IMM8, 8);
1448 let imm8: i64 = IMM8 as i64;
1449 transmute(src:vpshldvq128(
1450 a:a.as_i64x2(),
1451 b:b.as_i64x2(),
1452 c:_mm_set1_epi64x(imm8).as_i64x2(),
1453 ))
1454}
1455
1456/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1457///
1458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi64&expand=5052)
1459#[inline]
1460#[target_feature(enable = "avx512vbmi2,avx512vl")]
1461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1462#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1463#[rustc_legacy_const_generics(4)]
1464pub unsafe fn _mm_mask_shldi_epi64<const IMM8: i32>(
1465 src: __m128i,
1466 k: __mmask8,
1467 a: __m128i,
1468 b: __m128i,
1469) -> __m128i {
1470 static_assert_uimm_bits!(IMM8, 8);
1471 let imm8: i64 = IMM8 as i64;
1472 let shf: i64x2 = vpshldvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:_mm_set1_epi64x(imm8).as_i64x2());
1473 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
1474}
1475
1476/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1477///
1478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi64&expand=5053)
1479#[inline]
1480#[target_feature(enable = "avx512vbmi2,avx512vl")]
1481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1482#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1483#[rustc_legacy_const_generics(3)]
1484pub unsafe fn _mm_maskz_shldi_epi64<const IMM8: i32>(
1485 k: __mmask8,
1486 a: __m128i,
1487 b: __m128i,
1488) -> __m128i {
1489 static_assert_uimm_bits!(IMM8, 8);
1490 let imm8: i64 = IMM8 as i64;
1491 let shf: i64x2 = vpshldvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:_mm_set1_epi64x(imm8).as_i64x2());
1492 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1493 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1494}
1495
1496/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1497///
1498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi32&expand=5051)
1499#[inline]
1500#[target_feature(enable = "avx512vbmi2")]
1501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1502#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1503#[rustc_legacy_const_generics(2)]
1504pub unsafe fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1505 static_assert_uimm_bits!(IMM8, 8);
1506 transmute(src:vpshldvd(
1507 a:a.as_i32x16(),
1508 b:b.as_i32x16(),
1509 c:_mm512_set1_epi32(IMM8).as_i32x16(),
1510 ))
1511}
1512
1513/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1514///
1515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi32&expand=5049)
1516#[inline]
1517#[target_feature(enable = "avx512vbmi2")]
1518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1519#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1520#[rustc_legacy_const_generics(4)]
1521pub unsafe fn _mm512_mask_shldi_epi32<const IMM8: i32>(
1522 src: __m512i,
1523 k: __mmask16,
1524 a: __m512i,
1525 b: __m512i,
1526) -> __m512i {
1527 static_assert_uimm_bits!(IMM8, 8);
1528 let shf: i32x16 = vpshldvd(
1529 a:a.as_i32x16(),
1530 b:b.as_i32x16(),
1531 c:_mm512_set1_epi32(IMM8).as_i32x16(),
1532 );
1533 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
1534}
1535
1536/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1537///
1538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi32&expand=5050)
1539#[inline]
1540#[target_feature(enable = "avx512vbmi2")]
1541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1542#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1543#[rustc_legacy_const_generics(3)]
1544pub unsafe fn _mm512_maskz_shldi_epi32<const IMM8: i32>(
1545 k: __mmask16,
1546 a: __m512i,
1547 b: __m512i,
1548) -> __m512i {
1549 static_assert_uimm_bits!(IMM8, 8);
1550 let shf: i32x16 = vpshldvd(
1551 a:a.as_i32x16(),
1552 b:b.as_i32x16(),
1553 c:_mm512_set1_epi32(IMM8).as_i32x16(),
1554 );
1555 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
1556 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1557}
1558
1559/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1560///
1561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi32&expand=5048)
1562#[inline]
1563#[target_feature(enable = "avx512vbmi2,avx512vl")]
1564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1565#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1566#[rustc_legacy_const_generics(2)]
1567pub unsafe fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1568 static_assert_uimm_bits!(IMM8, 8);
1569 transmute(src:vpshldvd256(
1570 a:a.as_i32x8(),
1571 b:b.as_i32x8(),
1572 c:_mm256_set1_epi32(IMM8).as_i32x8(),
1573 ))
1574}
1575
1576/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1577///
1578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi32&expand=5046)
1579#[inline]
1580#[target_feature(enable = "avx512vbmi2,avx512vl")]
1581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1582#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1583#[rustc_legacy_const_generics(4)]
1584pub unsafe fn _mm256_mask_shldi_epi32<const IMM8: i32>(
1585 src: __m256i,
1586 k: __mmask8,
1587 a: __m256i,
1588 b: __m256i,
1589) -> __m256i {
1590 static_assert_uimm_bits!(IMM8, 8);
1591 let shf: i32x8 = vpshldvd256(
1592 a:a.as_i32x8(),
1593 b:b.as_i32x8(),
1594 c:_mm256_set1_epi32(IMM8).as_i32x8(),
1595 );
1596 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
1597}
1598
1599/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1600///
1601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi32&expand=5047)
1602#[inline]
1603#[target_feature(enable = "avx512vbmi2,avx512vl")]
1604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1605#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1606#[rustc_legacy_const_generics(3)]
1607pub unsafe fn _mm256_maskz_shldi_epi32<const IMM8: i32>(
1608 k: __mmask8,
1609 a: __m256i,
1610 b: __m256i,
1611) -> __m256i {
1612 static_assert_uimm_bits!(IMM8, 8);
1613 let shf: i32x8 = vpshldvd256(
1614 a:a.as_i32x8(),
1615 b:b.as_i32x8(),
1616 c:_mm256_set1_epi32(IMM8).as_i32x8(),
1617 );
1618 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
1619 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1620}
1621
1622/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1623///
1624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi32&expand=5045)
1625#[inline]
1626#[target_feature(enable = "avx512vbmi2,avx512vl")]
1627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1628#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1629#[rustc_legacy_const_generics(2)]
1630pub unsafe fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1631 static_assert_uimm_bits!(IMM8, 8);
1632 transmute(src:vpshldvd128(
1633 a:a.as_i32x4(),
1634 b:b.as_i32x4(),
1635 c:_mm_set1_epi32(IMM8).as_i32x4(),
1636 ))
1637}
1638
1639/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1640///
1641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi32&expand=5043)
1642#[inline]
1643#[target_feature(enable = "avx512vbmi2,avx512vl")]
1644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1645#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1646#[rustc_legacy_const_generics(4)]
1647pub unsafe fn _mm_mask_shldi_epi32<const IMM8: i32>(
1648 src: __m128i,
1649 k: __mmask8,
1650 a: __m128i,
1651 b: __m128i,
1652) -> __m128i {
1653 static_assert_uimm_bits!(IMM8, 8);
1654 let shf: i32x4 = vpshldvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:_mm_set1_epi32(IMM8).as_i32x4());
1655 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
1656}
1657
1658/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1659///
1660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi32&expand=5044)
1661#[inline]
1662#[target_feature(enable = "avx512vbmi2,avx512vl")]
1663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1664#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1665#[rustc_legacy_const_generics(3)]
1666pub unsafe fn _mm_maskz_shldi_epi32<const IMM8: i32>(
1667 k: __mmask8,
1668 a: __m128i,
1669 b: __m128i,
1670) -> __m128i {
1671 static_assert_uimm_bits!(IMM8, 8);
1672 let shf: i32x4 = vpshldvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:_mm_set1_epi32(IMM8).as_i32x4());
1673 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1674 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1675}
1676
1677/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1678///
1679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi16&expand=5042)
1680#[inline]
1681#[target_feature(enable = "avx512vbmi2")]
1682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1683#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1684#[rustc_legacy_const_generics(2)]
1685pub unsafe fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1686 static_assert_uimm_bits!(IMM8, 8);
1687 let imm8: i16 = IMM8 as i16;
1688 transmute(src:vpshldvw(
1689 a:a.as_i16x32(),
1690 b:b.as_i16x32(),
1691 c:_mm512_set1_epi16(imm8).as_i16x32(),
1692 ))
1693}
1694
1695/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1696///
1697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi16&expand=5040)
1698#[inline]
1699#[target_feature(enable = "avx512vbmi2")]
1700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1701#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1702#[rustc_legacy_const_generics(4)]
1703pub unsafe fn _mm512_mask_shldi_epi16<const IMM8: i32>(
1704 src: __m512i,
1705 k: __mmask32,
1706 a: __m512i,
1707 b: __m512i,
1708) -> __m512i {
1709 static_assert_uimm_bits!(IMM8, 8);
1710 let imm8: i16 = IMM8 as i16;
1711 let shf: i16x32 = vpshldvw(
1712 a:a.as_i16x32(),
1713 b:b.as_i16x32(),
1714 c:_mm512_set1_epi16(imm8).as_i16x32(),
1715 );
1716 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x32()))
1717}
1718
1719/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1720///
1721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi16&expand=5041)
1722#[inline]
1723#[target_feature(enable = "avx512vbmi2")]
1724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1725#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1726#[rustc_legacy_const_generics(3)]
1727pub unsafe fn _mm512_maskz_shldi_epi16<const IMM8: i32>(
1728 k: __mmask32,
1729 a: __m512i,
1730 b: __m512i,
1731) -> __m512i {
1732 static_assert_uimm_bits!(IMM8, 8);
1733 let imm8: i16 = IMM8 as i16;
1734 let shf: i16x32 = vpshldvw(
1735 a:a.as_i16x32(),
1736 b:b.as_i16x32(),
1737 c:_mm512_set1_epi16(imm8).as_i16x32(),
1738 );
1739 let zero: i16x32 = _mm512_setzero_si512().as_i16x32();
1740 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1741}
1742
1743/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1744///
1745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi16&expand=5039)
1746#[inline]
1747#[target_feature(enable = "avx512vbmi2,avx512vl")]
1748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1749#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1750#[rustc_legacy_const_generics(2)]
1751pub unsafe fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1752 static_assert_uimm_bits!(IMM8, 8);
1753 let imm8: i16 = IMM8 as i16;
1754 transmute(src:vpshldvw256(
1755 a:a.as_i16x16(),
1756 b:b.as_i16x16(),
1757 c:_mm256_set1_epi16(imm8).as_i16x16(),
1758 ))
1759}
1760
1761/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1762///
1763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi16&expand=5037)
1764#[inline]
1765#[target_feature(enable = "avx512vbmi2,avx512vl")]
1766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1767#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1768#[rustc_legacy_const_generics(4)]
1769pub unsafe fn _mm256_mask_shldi_epi16<const IMM8: i32>(
1770 src: __m256i,
1771 k: __mmask16,
1772 a: __m256i,
1773 b: __m256i,
1774) -> __m256i {
1775 static_assert_uimm_bits!(IMM8, 8);
1776 let imm8: i16 = IMM8 as i16;
1777 let shf: i16x16 = vpshldvw256(
1778 a:a.as_i16x16(),
1779 b:b.as_i16x16(),
1780 c:_mm256_set1_epi16(imm8).as_i16x16(),
1781 );
1782 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x16()))
1783}
1784
1785/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1786///
1787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi16&expand=5038)
1788#[inline]
1789#[target_feature(enable = "avx512vbmi2,avx512vl")]
1790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1791#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1792#[rustc_legacy_const_generics(3)]
1793pub unsafe fn _mm256_maskz_shldi_epi16<const IMM8: i32>(
1794 k: __mmask16,
1795 a: __m256i,
1796 b: __m256i,
1797) -> __m256i {
1798 static_assert_uimm_bits!(IMM8, 8);
1799 let imm8: i16 = IMM8 as i16;
1800 let shf: i16x16 = vpshldvw256(
1801 a:a.as_i16x16(),
1802 b:b.as_i16x16(),
1803 c:_mm256_set1_epi16(imm8).as_i16x16(),
1804 );
1805 let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
1806 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1807}
1808
1809/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1810///
1811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi16&expand=5036)
1812#[inline]
1813#[target_feature(enable = "avx512vbmi2,avx512vl")]
1814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1815#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1816#[rustc_legacy_const_generics(2)]
1817pub unsafe fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1818 static_assert_uimm_bits!(IMM8, 8);
1819 let imm8: i16 = IMM8 as i16;
1820 transmute(src:vpshldvw128(
1821 a:a.as_i16x8(),
1822 b:b.as_i16x8(),
1823 c:_mm_set1_epi16(imm8).as_i16x8(),
1824 ))
1825}
1826
1827/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1828///
1829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi16&expand=5034)
1830#[inline]
1831#[target_feature(enable = "avx512vbmi2,avx512vl")]
1832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1833#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1834#[rustc_legacy_const_generics(4)]
1835pub unsafe fn _mm_mask_shldi_epi16<const IMM8: i32>(
1836 src: __m128i,
1837 k: __mmask8,
1838 a: __m128i,
1839 b: __m128i,
1840) -> __m128i {
1841 static_assert_uimm_bits!(IMM8, 8);
1842 let imm8: i16 = IMM8 as i16;
1843 let shf: i16x8 = vpshldvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:_mm_set1_epi16(imm8).as_i16x8());
1844 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x8()))
1845}
1846
1847/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1848///
1849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi16&expand=5035)
1850#[inline]
1851#[target_feature(enable = "avx512vbmi2,avx512vl")]
1852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1853#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1854#[rustc_legacy_const_generics(3)]
1855pub unsafe fn _mm_maskz_shldi_epi16<const IMM8: i32>(
1856 k: __mmask8,
1857 a: __m128i,
1858 b: __m128i,
1859) -> __m128i {
1860 static_assert_uimm_bits!(IMM8, 8);
1861 let imm8: i16 = IMM8 as i16;
1862 let shf: i16x8 = vpshldvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:_mm_set1_epi16(imm8).as_i16x8());
1863 let zero: i16x8 = _mm_setzero_si128().as_i16x8();
1864 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1865}
1866
1867/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1868///
1869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi64&expand=5114)
1870#[inline]
1871#[target_feature(enable = "avx512vbmi2")]
1872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1873#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1874#[rustc_legacy_const_generics(2)]
1875pub unsafe fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1876 static_assert_uimm_bits!(IMM8, 8);
1877 let imm8: i64 = IMM8 as i64;
1878 transmute(src:vpshrdvq(
1879 a:a.as_i64x8(),
1880 b:b.as_i64x8(),
1881 c:_mm512_set1_epi64(imm8).as_i64x8(),
1882 ))
1883}
1884
1885/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1886///
1887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi64&expand=5112)
1888#[inline]
1889#[target_feature(enable = "avx512vbmi2")]
1890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1891#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1892#[rustc_legacy_const_generics(4)]
1893pub unsafe fn _mm512_mask_shrdi_epi64<const IMM8: i32>(
1894 src: __m512i,
1895 k: __mmask8,
1896 a: __m512i,
1897 b: __m512i,
1898) -> __m512i {
1899 static_assert_uimm_bits!(IMM8, 8);
1900 let imm8: i64 = IMM8 as i64;
1901 let shf: i64x8 = vpshrdvq(
1902 a:a.as_i64x8(),
1903 b:b.as_i64x8(),
1904 c:_mm512_set1_epi64(imm8).as_i64x8(),
1905 );
1906 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
1907}
1908
1909/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1910///
1911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi64&expand=5113)
1912#[inline]
1913#[target_feature(enable = "avx512vbmi2")]
1914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1915#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq
1916#[rustc_legacy_const_generics(3)]
1917pub unsafe fn _mm512_maskz_shrdi_epi64<const IMM8: i32>(
1918 k: __mmask8,
1919 a: __m512i,
1920 b: __m512i,
1921) -> __m512i {
1922 static_assert_uimm_bits!(IMM8, 8);
1923 let imm8: i64 = IMM8 as i64;
1924 let shf: i64x8 = vpshrdvq(
1925 a:a.as_i64x8(),
1926 b:b.as_i64x8(),
1927 c:_mm512_set1_epi64(imm8).as_i64x8(),
1928 );
1929 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
1930 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1931}
1932
1933/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1934///
1935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi64&expand=5111)
1936#[inline]
1937#[target_feature(enable = "avx512vbmi2,avx512vl")]
1938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1939#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1940#[rustc_legacy_const_generics(2)]
1941pub unsafe fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1942 static_assert_uimm_bits!(IMM8, 8);
1943 let imm8: i64 = IMM8 as i64;
1944 transmute(src:vpshrdvq256(
1945 a:a.as_i64x4(),
1946 b:b.as_i64x4(),
1947 c:_mm256_set1_epi64x(imm8).as_i64x4(),
1948 ))
1949}
1950
1951/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1952///
1953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi64&expand=5109)
1954#[inline]
1955#[target_feature(enable = "avx512vbmi2,avx512vl")]
1956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1957#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1958#[rustc_legacy_const_generics(4)]
1959pub unsafe fn _mm256_mask_shrdi_epi64<const IMM8: i32>(
1960 src: __m256i,
1961 k: __mmask8,
1962 a: __m256i,
1963 b: __m256i,
1964) -> __m256i {
1965 static_assert_uimm_bits!(IMM8, 8);
1966 let imm8: i64 = IMM8 as i64;
1967 let shf: i64x4 = vpshrdvq256(
1968 a:a.as_i64x4(),
1969 b:b.as_i64x4(),
1970 c:_mm256_set1_epi64x(imm8).as_i64x4(),
1971 );
1972 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
1973}
1974
1975/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1976///
1977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi64&expand=5110)
1978#[inline]
1979#[target_feature(enable = "avx512vbmi2,avx512vl")]
1980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1981#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1982#[rustc_legacy_const_generics(3)]
1983pub unsafe fn _mm256_maskz_shrdi_epi64<const IMM8: i32>(
1984 k: __mmask8,
1985 a: __m256i,
1986 b: __m256i,
1987) -> __m256i {
1988 static_assert_uimm_bits!(IMM8, 8);
1989 let imm8: i64 = IMM8 as i64;
1990 let shf: i64x4 = vpshrdvq256(
1991 a:a.as_i64x4(),
1992 b:b.as_i64x4(),
1993 c:_mm256_set1_epi64x(imm8).as_i64x4(),
1994 );
1995 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1996 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
1997}
1998
1999/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
2000///
2001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi64&expand=5108)
2002#[inline]
2003#[target_feature(enable = "avx512vbmi2,avx512vl")]
2004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2005#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
2006#[rustc_legacy_const_generics(2)]
2007pub unsafe fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2008 static_assert_uimm_bits!(IMM8, 8);
2009 let imm8: i64 = IMM8 as i64;
2010 transmute(src:vpshrdvq128(
2011 a:a.as_i64x2(),
2012 b:b.as_i64x2(),
2013 c:_mm_set1_epi64x(imm8).as_i64x2(),
2014 ))
2015}
2016
2017/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
2018///
2019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi64&expand=5106)
2020#[inline]
2021#[target_feature(enable = "avx512vbmi2,avx512vl")]
2022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2023#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
2024#[rustc_legacy_const_generics(4)]
2025pub unsafe fn _mm_mask_shrdi_epi64<const IMM8: i32>(
2026 src: __m128i,
2027 k: __mmask8,
2028 a: __m128i,
2029 b: __m128i,
2030) -> __m128i {
2031 static_assert_uimm_bits!(IMM8, 8);
2032 let imm8: i64 = IMM8 as i64;
2033 let shf: i64x2 = vpshrdvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:_mm_set1_epi64x(imm8).as_i64x2());
2034 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
2035}
2036
2037/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2038///
2039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi64&expand=5107)
2040#[inline]
2041#[target_feature(enable = "avx512vbmi2,avx512vl")]
2042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2043#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
2044#[rustc_legacy_const_generics(3)]
2045pub unsafe fn _mm_maskz_shrdi_epi64<const IMM8: i32>(
2046 k: __mmask8,
2047 a: __m128i,
2048 b: __m128i,
2049) -> __m128i {
2050 static_assert_uimm_bits!(IMM8, 8);
2051 let imm8: i64 = IMM8 as i64;
2052 let shf: i64x2 = vpshrdvq128(a:a.as_i64x2(), b:b.as_i64x2(), c:_mm_set1_epi64x(imm8).as_i64x2());
2053 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
2054 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2055}
2056
2057/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
2058///
2059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi32&expand=5105)
2060#[inline]
2061#[target_feature(enable = "avx512vbmi2")]
2062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2063#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2064#[rustc_legacy_const_generics(2)]
2065pub unsafe fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
2066 static_assert_uimm_bits!(IMM8, 8);
2067 transmute(src:vpshrdvd(
2068 a:a.as_i32x16(),
2069 b:b.as_i32x16(),
2070 c:_mm512_set1_epi32(IMM8).as_i32x16(),
2071 ))
2072}
2073
2074/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2075///
2076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi32&expand=5103)
2077#[inline]
2078#[target_feature(enable = "avx512vbmi2")]
2079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2080#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2081#[rustc_legacy_const_generics(4)]
2082pub unsafe fn _mm512_mask_shrdi_epi32<const IMM8: i32>(
2083 src: __m512i,
2084 k: __mmask16,
2085 a: __m512i,
2086 b: __m512i,
2087) -> __m512i {
2088 static_assert_uimm_bits!(IMM8, 8);
2089 let shf: i32x16 = vpshrdvd(
2090 a:a.as_i32x16(),
2091 b:b.as_i32x16(),
2092 c:_mm512_set1_epi32(IMM8).as_i32x16(),
2093 );
2094 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
2095}
2096
2097/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2098///
2099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi32&expand=5104)
2100#[inline]
2101#[target_feature(enable = "avx512vbmi2")]
2102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2103#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2104#[rustc_legacy_const_generics(3)]
2105pub unsafe fn _mm512_maskz_shrdi_epi32<const IMM8: i32>(
2106 k: __mmask16,
2107 a: __m512i,
2108 b: __m512i,
2109) -> __m512i {
2110 static_assert_uimm_bits!(IMM8, 8);
2111 let shf: i32x16 = vpshrdvd(
2112 a:a.as_i32x16(),
2113 b:b.as_i32x16(),
2114 c:_mm512_set1_epi32(IMM8).as_i32x16(),
2115 );
2116 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
2117 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2118}
2119
2120/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
2121///
2122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi32&expand=5102)
2123#[inline]
2124#[target_feature(enable = "avx512vbmi2,avx512vl")]
2125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2126#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2127#[rustc_legacy_const_generics(2)]
2128pub unsafe fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2129 static_assert_uimm_bits!(IMM8, 8);
2130 transmute(src:vpshrdvd256(
2131 a:a.as_i32x8(),
2132 b:b.as_i32x8(),
2133 c:_mm256_set1_epi32(IMM8).as_i32x8(),
2134 ))
2135}
2136
2137/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2138///
2139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi32&expand=5100)
2140#[inline]
2141#[target_feature(enable = "avx512vbmi2,avx512vl")]
2142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2143#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2144#[rustc_legacy_const_generics(4)]
2145pub unsafe fn _mm256_mask_shrdi_epi32<const IMM8: i32>(
2146 src: __m256i,
2147 k: __mmask8,
2148 a: __m256i,
2149 b: __m256i,
2150) -> __m256i {
2151 static_assert_uimm_bits!(IMM8, 8);
2152 let shf: i32x8 = vpshrdvd256(
2153 a:a.as_i32x8(),
2154 b:b.as_i32x8(),
2155 c:_mm256_set1_epi32(IMM8).as_i32x8(),
2156 );
2157 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
2158}
2159
2160/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2161///
2162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi32&expand=5101)
2163#[inline]
2164#[target_feature(enable = "avx512vbmi2,avx512vl")]
2165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2166#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2167#[rustc_legacy_const_generics(3)]
2168pub unsafe fn _mm256_maskz_shrdi_epi32<const IMM8: i32>(
2169 k: __mmask8,
2170 a: __m256i,
2171 b: __m256i,
2172) -> __m256i {
2173 static_assert_uimm_bits!(IMM8, 8);
2174 let shf: i32x8 = vpshrdvd256(
2175 a:a.as_i32x8(),
2176 b:b.as_i32x8(),
2177 c:_mm256_set1_epi32(IMM8).as_i32x8(),
2178 );
2179 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
2180 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2181}
2182
2183/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
2184///
2185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi32&expand=5099)
2186#[inline]
2187#[target_feature(enable = "avx512vbmi2,avx512vl")]
2188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2189#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2190#[rustc_legacy_const_generics(2)]
2191pub unsafe fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2192 static_assert_uimm_bits!(IMM8, 8);
2193 transmute(src:vpshrdvd128(
2194 a:a.as_i32x4(),
2195 b:b.as_i32x4(),
2196 c:_mm_set1_epi32(IMM8).as_i32x4(),
2197 ))
2198}
2199
2200/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2201///
2202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi32&expand=5097)
2203#[inline]
2204#[target_feature(enable = "avx512vbmi2,avx512vl")]
2205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2206#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2207#[rustc_legacy_const_generics(4)]
2208pub unsafe fn _mm_mask_shrdi_epi32<const IMM8: i32>(
2209 src: __m128i,
2210 k: __mmask8,
2211 a: __m128i,
2212 b: __m128i,
2213) -> __m128i {
2214 static_assert_uimm_bits!(IMM8, 8);
2215 let shf: i32x4 = vpshrdvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:_mm_set1_epi32(IMM8).as_i32x4());
2216 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
2217}
2218
2219/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2220///
2221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi32&expand=5098)
2222#[inline]
2223#[target_feature(enable = "avx512vbmi2,avx512vl")]
2224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2225#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2226#[rustc_legacy_const_generics(3)]
2227pub unsafe fn _mm_maskz_shrdi_epi32<const IMM8: i32>(
2228 k: __mmask8,
2229 a: __m128i,
2230 b: __m128i,
2231) -> __m128i {
2232 static_assert_uimm_bits!(IMM8, 8);
2233 let shf: i32x4 = vpshrdvd128(a:a.as_i32x4(), b:b.as_i32x4(), c:_mm_set1_epi32(IMM8).as_i32x4());
2234 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
2235 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2236}
2237
2238/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2239///
2240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi16&expand=5096)
2241#[inline]
2242#[target_feature(enable = "avx512vbmi2")]
2243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2244#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2245#[rustc_legacy_const_generics(2)]
2246pub unsafe fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
2247 static_assert_uimm_bits!(IMM8, 8);
2248 let imm8: i16 = IMM8 as i16;
2249 assert!(matches!(imm8, 0..=255));
2250 transmute(src:vpshrdvw(
2251 a:a.as_i16x32(),
2252 b:b.as_i16x32(),
2253 c:_mm512_set1_epi16(imm8).as_i16x32(),
2254 ))
2255}
2256
2257/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2258///
2259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi16&expand=5094)
2260#[inline]
2261#[target_feature(enable = "avx512vbmi2")]
2262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2263#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2264#[rustc_legacy_const_generics(4)]
2265pub unsafe fn _mm512_mask_shrdi_epi16<const IMM8: i32>(
2266 src: __m512i,
2267 k: __mmask32,
2268 a: __m512i,
2269 b: __m512i,
2270) -> __m512i {
2271 static_assert_uimm_bits!(IMM8, 8);
2272 let imm8: i16 = IMM8 as i16;
2273 assert!(matches!(imm8, 0..=255));
2274 let shf: i16x32 = vpshrdvw(
2275 a:a.as_i16x32(),
2276 b:b.as_i16x32(),
2277 c:_mm512_set1_epi16(imm8).as_i16x32(),
2278 );
2279 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x32()))
2280}
2281
2282/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2283///
2284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi16&expand=5095)
2285#[inline]
2286#[target_feature(enable = "avx512vbmi2")]
2287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2288#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2289#[rustc_legacy_const_generics(3)]
2290pub unsafe fn _mm512_maskz_shrdi_epi16<const IMM8: i32>(
2291 k: __mmask32,
2292 a: __m512i,
2293 b: __m512i,
2294) -> __m512i {
2295 static_assert_uimm_bits!(IMM8, 8);
2296 let imm8: i16 = IMM8 as i16;
2297 assert!(matches!(imm8, 0..=255));
2298 let shf: i16x32 = vpshrdvw(
2299 a:a.as_i16x32(),
2300 b:b.as_i16x32(),
2301 c:_mm512_set1_epi16(imm8).as_i16x32(),
2302 );
2303 let zero: i16x32 = _mm512_setzero_si512().as_i16x32();
2304 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2305}
2306
2307/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2308///
2309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi16&expand=5093)
2310#[inline]
2311#[target_feature(enable = "avx512vbmi2,avx512vl")]
2312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2313#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2314#[rustc_legacy_const_generics(2)]
2315pub unsafe fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2316 static_assert_uimm_bits!(IMM8, 8);
2317 let imm8: i16 = IMM8 as i16;
2318 assert!(matches!(imm8, 0..=255));
2319 transmute(src:vpshrdvw256(
2320 a:a.as_i16x16(),
2321 b:b.as_i16x16(),
2322 c:_mm256_set1_epi16(imm8).as_i16x16(),
2323 ))
2324}
2325
2326/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2327///
2328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi16&expand=5091)
2329#[inline]
2330#[target_feature(enable = "avx512vbmi2,avx512vl")]
2331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2332#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2333#[rustc_legacy_const_generics(4)]
2334pub unsafe fn _mm256_mask_shrdi_epi16<const IMM8: i32>(
2335 src: __m256i,
2336 k: __mmask16,
2337 a: __m256i,
2338 b: __m256i,
2339) -> __m256i {
2340 static_assert_uimm_bits!(IMM8, 8);
2341 let imm8: i16 = IMM8 as i16;
2342 assert!(matches!(imm8, 0..=255));
2343 let shf: i16x16 = vpshrdvw256(
2344 a:a.as_i16x16(),
2345 b:b.as_i16x16(),
2346 c:_mm256_set1_epi16(imm8).as_i16x16(),
2347 );
2348 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x16()))
2349}
2350
2351/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2352///
2353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi16&expand=5092)
2354#[inline]
2355#[target_feature(enable = "avx512vbmi2,avx512vl")]
2356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2357#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2358#[rustc_legacy_const_generics(3)]
2359pub unsafe fn _mm256_maskz_shrdi_epi16<const IMM8: i32>(
2360 k: __mmask16,
2361 a: __m256i,
2362 b: __m256i,
2363) -> __m256i {
2364 static_assert_uimm_bits!(IMM8, 8);
2365 let imm8: i16 = IMM8 as i16;
2366 let shf: i16x16 = vpshrdvw256(
2367 a:a.as_i16x16(),
2368 b:b.as_i16x16(),
2369 c:_mm256_set1_epi16(imm8).as_i16x16(),
2370 );
2371 let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
2372 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2373}
2374
2375/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2376///
2377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi16&expand=5090)
2378#[inline]
2379#[target_feature(enable = "avx512vbmi2,avx512vl")]
2380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2381#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2382#[rustc_legacy_const_generics(2)]
2383pub unsafe fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2384 static_assert_uimm_bits!(IMM8, 8);
2385 let imm8: i16 = IMM8 as i16;
2386 transmute(src:vpshrdvw128(
2387 a:a.as_i16x8(),
2388 b:b.as_i16x8(),
2389 c:_mm_set1_epi16(imm8).as_i16x8(),
2390 ))
2391}
2392
2393/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2394///
2395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi16&expand=5088)
2396#[inline]
2397#[target_feature(enable = "avx512vbmi2,avx512vl")]
2398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2399#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2400#[rustc_legacy_const_generics(4)]
2401pub unsafe fn _mm_mask_shrdi_epi16<const IMM8: i32>(
2402 src: __m128i,
2403 k: __mmask8,
2404 a: __m128i,
2405 b: __m128i,
2406) -> __m128i {
2407 static_assert_uimm_bits!(IMM8, 8);
2408 let imm8: i16 = IMM8 as i16;
2409 let shf: i16x8 = vpshrdvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:_mm_set1_epi16(imm8).as_i16x8());
2410 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i16x8()))
2411}
2412
2413/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2414///
2415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi16&expand=5089)
2416#[inline]
2417#[target_feature(enable = "avx512vbmi2,avx512vl")]
2418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2419#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2420#[rustc_legacy_const_generics(3)]
2421pub unsafe fn _mm_maskz_shrdi_epi16<const IMM8: i32>(
2422 k: __mmask8,
2423 a: __m128i,
2424 b: __m128i,
2425) -> __m128i {
2426 static_assert_uimm_bits!(IMM8, 8);
2427 let imm8: i16 = IMM8 as i16;
2428 let shf: i16x8 = vpshrdvw128(a:a.as_i16x8(), b:b.as_i16x8(), c:_mm_set1_epi16(imm8).as_i16x8());
2429 let zero: i16x8 = _mm_setzero_si128().as_i16x8();
2430 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
2431}
2432
2433#[allow(improper_ctypes)]
2434extern "C" {
2435 #[link_name = "llvm.x86.avx512.mask.compress.store.w.512"]
2436 fn vcompressstorew(mem: *mut i8, data: i16x32, mask: u32);
2437 #[link_name = "llvm.x86.avx512.mask.compress.store.w.256"]
2438 fn vcompressstorew256(mem: *mut i8, data: i16x16, mask: u16);
2439 #[link_name = "llvm.x86.avx512.mask.compress.store.w.128"]
2440 fn vcompressstorew128(mem: *mut i8, data: i16x8, mask: u8);
2441
2442 #[link_name = "llvm.x86.avx512.mask.compress.store.b.512"]
2443 fn vcompressstoreb(mem: *mut i8, data: i8x64, mask: u64);
2444 #[link_name = "llvm.x86.avx512.mask.compress.store.b.256"]
2445 fn vcompressstoreb256(mem: *mut i8, data: i8x32, mask: u32);
2446 #[link_name = "llvm.x86.avx512.mask.compress.store.b.128"]
2447 fn vcompressstoreb128(mem: *mut i8, data: i8x16, mask: u16);
2448
2449 #[link_name = "llvm.x86.avx512.mask.compress.w.512"]
2450 fn vpcompressw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2451 #[link_name = "llvm.x86.avx512.mask.compress.w.256"]
2452 fn vpcompressw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2453 #[link_name = "llvm.x86.avx512.mask.compress.w.128"]
2454 fn vpcompressw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2455
2456 #[link_name = "llvm.x86.avx512.mask.compress.b.512"]
2457 fn vpcompressb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2458 #[link_name = "llvm.x86.avx512.mask.compress.b.256"]
2459 fn vpcompressb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2460 #[link_name = "llvm.x86.avx512.mask.compress.b.128"]
2461 fn vpcompressb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2462
2463 #[link_name = "llvm.x86.avx512.mask.expand.w.512"]
2464 fn vpexpandw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2465 #[link_name = "llvm.x86.avx512.mask.expand.w.256"]
2466 fn vpexpandw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2467 #[link_name = "llvm.x86.avx512.mask.expand.w.128"]
2468 fn vpexpandw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2469
2470 #[link_name = "llvm.x86.avx512.mask.expand.b.512"]
2471 fn vpexpandb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2472 #[link_name = "llvm.x86.avx512.mask.expand.b.256"]
2473 fn vpexpandb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2474 #[link_name = "llvm.x86.avx512.mask.expand.b.128"]
2475 fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2476
2477 #[link_name = "llvm.fshl.v8i64"]
2478 fn vpshldvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
2479 #[link_name = "llvm.fshl.v4i64"]
2480 fn vpshldvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
2481 #[link_name = "llvm.fshl.v2i64"]
2482 fn vpshldvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
2483 #[link_name = "llvm.fshl.v16i32"]
2484 fn vpshldvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
2485 #[link_name = "llvm.fshl.v8i32"]
2486 fn vpshldvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
2487 #[link_name = "llvm.fshl.v4i32"]
2488 fn vpshldvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
2489 #[link_name = "llvm.fshl.v32i16"]
2490 fn vpshldvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
2491 #[link_name = "llvm.fshl.v16i16"]
2492 fn vpshldvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
2493 #[link_name = "llvm.fshl.v8i16"]
2494 fn vpshldvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
2495
2496 #[link_name = "llvm.fshr.v8i64"]
2497 fn vpshrdvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
2498 #[link_name = "llvm.fshr.v4i64"]
2499 fn vpshrdvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
2500 #[link_name = "llvm.fshr.v2i64"]
2501 fn vpshrdvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
2502 #[link_name = "llvm.fshr.v16i32"]
2503 fn vpshrdvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
2504 #[link_name = "llvm.fshr.v8i32"]
2505 fn vpshrdvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
2506 #[link_name = "llvm.fshr.v4i32"]
2507 fn vpshrdvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
2508 #[link_name = "llvm.fshr.v32i16"]
2509 fn vpshrdvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
2510 #[link_name = "llvm.fshr.v16i16"]
2511 fn vpshrdvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
2512 #[link_name = "llvm.fshr.v8i16"]
2513 fn vpshrdvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
2514}
2515
2516#[cfg(test)]
2517mod tests {
2518
2519 use stdarch_test::simd_test;
2520
2521 use crate::core_arch::x86::*;
2522 use crate::hint::black_box;
2523
2524 #[simd_test(enable = "avx512vbmi2")]
2525 unsafe fn test_mm512_mask_compress_epi16() {
2526 let src = _mm512_set1_epi16(200);
2527 #[rustfmt::skip]
2528 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2529 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2530 let r = _mm512_mask_compress_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2531 #[rustfmt::skip]
2532 let e = _mm512_set_epi16(
2533 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200,
2534 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2535 );
2536 assert_eq_m512i(r, e);
2537 }
2538
2539 #[simd_test(enable = "avx512vbmi2")]
2540 unsafe fn test_mm512_maskz_compress_epi16() {
2541 #[rustfmt::skip]
2542 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2543 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2544 let r = _mm512_maskz_compress_epi16(0b01010101_01010101_01010101_01010101, a);
2545 #[rustfmt::skip]
2546 let e = _mm512_set_epi16(
2547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2548 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2549 );
2550 assert_eq_m512i(r, e);
2551 }
2552
2553 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2554 unsafe fn test_mm256_mask_compress_epi16() {
2555 let src = _mm256_set1_epi16(200);
2556 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2557 let r = _mm256_mask_compress_epi16(src, 0b01010101_01010101, a);
2558 let e = _mm256_set_epi16(
2559 200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
2560 );
2561 assert_eq_m256i(r, e);
2562 }
2563
2564 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2565 unsafe fn test_mm256_maskz_compress_epi16() {
2566 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2567 let r = _mm256_maskz_compress_epi16(0b01010101_01010101, a);
2568 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2569 assert_eq_m256i(r, e);
2570 }
2571
2572 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2573 unsafe fn test_mm_mask_compress_epi16() {
2574 let src = _mm_set1_epi16(200);
2575 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2576 let r = _mm_mask_compress_epi16(src, 0b01010101, a);
2577 let e = _mm_set_epi16(200, 200, 200, 200, 1, 3, 5, 7);
2578 assert_eq_m128i(r, e);
2579 }
2580
2581 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2582 unsafe fn test_mm_maskz_compress_epi16() {
2583 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2584 let r = _mm_maskz_compress_epi16(0b01010101, a);
2585 let e = _mm_set_epi16(0, 0, 0, 0, 1, 3, 5, 7);
2586 assert_eq_m128i(r, e);
2587 }
2588
2589 #[simd_test(enable = "avx512vbmi2")]
2590 unsafe fn test_mm512_mask_compress_epi8() {
2591 let src = _mm512_set1_epi8(100);
2592 #[rustfmt::skip]
2593 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2594 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2595 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2596 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2597 let r = _mm512_mask_compress_epi8(
2598 src,
2599 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2600 a,
2601 );
2602 #[rustfmt::skip]
2603 let e = _mm512_set_epi8(
2604 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2605 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2606 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2607 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
2608 );
2609 assert_eq_m512i(r, e);
2610 }
2611
2612 #[simd_test(enable = "avx512vbmi2")]
2613 unsafe fn test_mm512_maskz_compress_epi8() {
2614 #[rustfmt::skip]
2615 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2616 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2617 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2618 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2619 let r = _mm512_maskz_compress_epi8(
2620 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2621 a,
2622 );
2623 #[rustfmt::skip]
2624 let e = _mm512_set_epi8(
2625 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2627 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2628 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
2629 );
2630 assert_eq_m512i(r, e);
2631 }
2632
2633 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2634 unsafe fn test_mm256_mask_compress_epi8() {
2635 let src = _mm256_set1_epi8(100);
2636 #[rustfmt::skip]
2637 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2638 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2639 let r = _mm256_mask_compress_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2640 #[rustfmt::skip]
2641 let e = _mm256_set_epi8(
2642 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2643 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2644 );
2645 assert_eq_m256i(r, e);
2646 }
2647
2648 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2649 unsafe fn test_mm256_maskz_compress_epi8() {
2650 #[rustfmt::skip]
2651 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2652 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2653 let r = _mm256_maskz_compress_epi8(0b01010101_01010101_01010101_01010101, a);
2654 #[rustfmt::skip]
2655 let e = _mm256_set_epi8(
2656 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2657 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2658 );
2659 assert_eq_m256i(r, e);
2660 }
2661
2662 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2663 unsafe fn test_mm_mask_compress_epi8() {
2664 let src = _mm_set1_epi8(100);
2665 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2666 let r = _mm_mask_compress_epi8(src, 0b01010101_01010101, a);
2667 let e = _mm_set_epi8(
2668 100, 100, 100, 100, 100, 100, 100, 100, 1, 3, 5, 7, 9, 11, 13, 15,
2669 );
2670 assert_eq_m128i(r, e);
2671 }
2672
2673 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2674 unsafe fn test_mm_maskz_compress_epi8() {
2675 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2676 let r = _mm_maskz_compress_epi8(0b01010101_01010101, a);
2677 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2678 assert_eq_m128i(r, e);
2679 }
2680
2681 #[simd_test(enable = "avx512vbmi2")]
2682 unsafe fn test_mm512_mask_expand_epi16() {
2683 let src = _mm512_set1_epi16(200);
2684 #[rustfmt::skip]
2685 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2686 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2687 let r = _mm512_mask_expand_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2688 #[rustfmt::skip]
2689 let e = _mm512_set_epi16(
2690 200, 16, 200, 17, 200, 18, 200, 19, 200, 20, 200, 21, 200, 22, 200, 23,
2691 200, 24, 200, 25, 200, 26, 200, 27, 200, 28, 200, 29, 200, 30, 200, 31,
2692 );
2693 assert_eq_m512i(r, e);
2694 }
2695
2696 #[simd_test(enable = "avx512vbmi2")]
2697 unsafe fn test_mm512_maskz_expand_epi16() {
2698 #[rustfmt::skip]
2699 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2700 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2701 let r = _mm512_maskz_expand_epi16(0b01010101_01010101_01010101_01010101, a);
2702 #[rustfmt::skip]
2703 let e = _mm512_set_epi16(0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2704 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31);
2705 assert_eq_m512i(r, e);
2706 }
2707
2708 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2709 unsafe fn test_mm256_mask_expand_epi16() {
2710 let src = _mm256_set1_epi16(200);
2711 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2712 let r = _mm256_mask_expand_epi16(src, 0b01010101_01010101, a);
2713 let e = _mm256_set_epi16(
2714 200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
2715 );
2716 assert_eq_m256i(r, e);
2717 }
2718
2719 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2720 unsafe fn test_mm256_maskz_expand_epi16() {
2721 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2722 let r = _mm256_maskz_expand_epi16(0b01010101_01010101, a);
2723 let e = _mm256_set_epi16(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2724 assert_eq_m256i(r, e);
2725 }
2726
2727 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2728 unsafe fn test_mm_mask_expand_epi16() {
2729 let src = _mm_set1_epi16(200);
2730 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2731 let r = _mm_mask_expand_epi16(src, 0b01010101, a);
2732 let e = _mm_set_epi16(200, 4, 200, 5, 200, 6, 200, 7);
2733 assert_eq_m128i(r, e);
2734 }
2735
2736 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2737 unsafe fn test_mm_maskz_expand_epi16() {
2738 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2739 let r = _mm_maskz_expand_epi16(0b01010101, a);
2740 let e = _mm_set_epi16(0, 4, 0, 5, 0, 6, 0, 7);
2741 assert_eq_m128i(r, e);
2742 }
2743
2744 #[simd_test(enable = "avx512vbmi2")]
2745 unsafe fn test_mm512_mask_expand_epi8() {
2746 let src = _mm512_set1_epi8(100);
2747 #[rustfmt::skip]
2748 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2749 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2750 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2751 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2752 let r = _mm512_mask_expand_epi8(
2753 src,
2754 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2755 a,
2756 );
2757 #[rustfmt::skip]
2758 let e = _mm512_set_epi8(
2759 100, 32, 100, 33, 100, 34, 100, 35, 100, 36, 100, 37, 100, 38, 100, 39,
2760 100, 40, 100, 41, 100, 42, 100, 43, 100, 44, 100, 45, 100, 46, 100, 47,
2761 100, 48, 100, 49, 100, 50, 100, 51, 100, 52, 100, 53, 100, 54, 100, 55,
2762 100, 56, 100, 57, 100, 58, 100, 59, 100, 60, 100, 61, 100, 62, 100, 63,
2763 );
2764 assert_eq_m512i(r, e);
2765 }
2766
2767 #[simd_test(enable = "avx512vbmi2")]
2768 unsafe fn test_mm512_maskz_expand_epi8() {
2769 #[rustfmt::skip]
2770 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2771 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2772 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2773 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2774 let r = _mm512_maskz_expand_epi8(
2775 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2776 a,
2777 );
2778 #[rustfmt::skip]
2779 let e = _mm512_set_epi8(
2780 0, 32, 0, 33, 0, 34, 0, 35, 0, 36, 0, 37, 0, 38, 0, 39,
2781 0, 40, 0, 41, 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, 0, 47,
2782 0, 48, 0, 49, 0, 50, 0, 51, 0, 52, 0, 53, 0, 54, 0, 55,
2783 0, 56, 0, 57, 0, 58, 0, 59, 0, 60, 0, 61, 0, 62, 0, 63,
2784 );
2785 assert_eq_m512i(r, e);
2786 }
2787
2788 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2789 unsafe fn test_mm256_mask_expand_epi8() {
2790 let src = _mm256_set1_epi8(100);
2791 #[rustfmt::skip]
2792 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2793 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2794 let r = _mm256_mask_expand_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2795 #[rustfmt::skip]
2796 let e = _mm256_set_epi8(
2797 100, 16, 100, 17, 100, 18, 100, 19, 100, 20, 100, 21, 100, 22, 100, 23,
2798 100, 24, 100, 25, 100, 26, 100, 27, 100, 28, 100, 29, 100, 30, 100, 31,
2799 );
2800 assert_eq_m256i(r, e);
2801 }
2802
2803 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2804 unsafe fn test_mm256_maskz_expand_epi8() {
2805 #[rustfmt::skip]
2806 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2807 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2808 let r = _mm256_maskz_expand_epi8(0b01010101_01010101_01010101_01010101, a);
2809 #[rustfmt::skip]
2810 let e = _mm256_set_epi8(
2811 0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2812 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31,
2813 );
2814 assert_eq_m256i(r, e);
2815 }
2816
2817 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2818 unsafe fn test_mm_mask_expand_epi8() {
2819 let src = _mm_set1_epi8(100);
2820 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2821 let r = _mm_mask_expand_epi8(src, 0b01010101_01010101, a);
2822 let e = _mm_set_epi8(
2823 100, 8, 100, 9, 100, 10, 100, 11, 100, 12, 100, 13, 100, 14, 100, 15,
2824 );
2825 assert_eq_m128i(r, e);
2826 }
2827
2828 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2829 unsafe fn test_mm_maskz_expand_epi8() {
2830 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2831 let r = _mm_maskz_expand_epi8(0b01010101_01010101, a);
2832 let e = _mm_set_epi8(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2833 assert_eq_m128i(r, e);
2834 }
2835
2836 #[simd_test(enable = "avx512vbmi2")]
2837 unsafe fn test_mm512_shldv_epi64() {
2838 let a = _mm512_set1_epi64(1);
2839 let b = _mm512_set1_epi64(1 << 63);
2840 let c = _mm512_set1_epi64(2);
2841 let r = _mm512_shldv_epi64(a, b, c);
2842 let e = _mm512_set1_epi64(6);
2843 assert_eq_m512i(r, e);
2844 }
2845
2846 #[simd_test(enable = "avx512vbmi2")]
2847 unsafe fn test_mm512_mask_shldv_epi64() {
2848 let a = _mm512_set1_epi64(1);
2849 let b = _mm512_set1_epi64(1 << 63);
2850 let c = _mm512_set1_epi64(2);
2851 let r = _mm512_mask_shldv_epi64(a, 0, b, c);
2852 assert_eq_m512i(r, a);
2853 let r = _mm512_mask_shldv_epi64(a, 0b11111111, b, c);
2854 let e = _mm512_set1_epi64(6);
2855 assert_eq_m512i(r, e);
2856 }
2857
2858 #[simd_test(enable = "avx512vbmi2")]
2859 unsafe fn test_mm512_maskz_shldv_epi64() {
2860 let a = _mm512_set1_epi64(1);
2861 let b = _mm512_set1_epi64(1 << 63);
2862 let c = _mm512_set1_epi64(2);
2863 let r = _mm512_maskz_shldv_epi64(0, a, b, c);
2864 assert_eq_m512i(r, _mm512_setzero_si512());
2865 let r = _mm512_maskz_shldv_epi64(0b11111111, a, b, c);
2866 let e = _mm512_set1_epi64(6);
2867 assert_eq_m512i(r, e);
2868 }
2869
2870 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2871 unsafe fn test_mm256_shldv_epi64() {
2872 let a = _mm256_set1_epi64x(1);
2873 let b = _mm256_set1_epi64x(1 << 63);
2874 let c = _mm256_set1_epi64x(2);
2875 let r = _mm256_shldv_epi64(a, b, c);
2876 let e = _mm256_set1_epi64x(6);
2877 assert_eq_m256i(r, e);
2878 }
2879
2880 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2881 unsafe fn test_mm256_mask_shldv_epi64() {
2882 let a = _mm256_set1_epi64x(1);
2883 let b = _mm256_set1_epi64x(1 << 63);
2884 let c = _mm256_set1_epi64x(2);
2885 let r = _mm256_mask_shldv_epi64(a, 0, b, c);
2886 assert_eq_m256i(r, a);
2887 let r = _mm256_mask_shldv_epi64(a, 0b00001111, b, c);
2888 let e = _mm256_set1_epi64x(6);
2889 assert_eq_m256i(r, e);
2890 }
2891
2892 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2893 unsafe fn test_mm256_maskz_shldv_epi64() {
2894 let a = _mm256_set1_epi64x(1);
2895 let b = _mm256_set1_epi64x(1 << 63);
2896 let c = _mm256_set1_epi64x(2);
2897 let r = _mm256_maskz_shldv_epi64(0, a, b, c);
2898 assert_eq_m256i(r, _mm256_setzero_si256());
2899 let r = _mm256_maskz_shldv_epi64(0b00001111, a, b, c);
2900 let e = _mm256_set1_epi64x(6);
2901 assert_eq_m256i(r, e);
2902 }
2903
2904 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2905 unsafe fn test_mm_shldv_epi64() {
2906 let a = _mm_set1_epi64x(1);
2907 let b = _mm_set1_epi64x(1 << 63);
2908 let c = _mm_set1_epi64x(2);
2909 let r = _mm_shldv_epi64(a, b, c);
2910 let e = _mm_set1_epi64x(6);
2911 assert_eq_m128i(r, e);
2912 }
2913
2914 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2915 unsafe fn test_mm_mask_shldv_epi64() {
2916 let a = _mm_set1_epi64x(1);
2917 let b = _mm_set1_epi64x(1 << 63);
2918 let c = _mm_set1_epi64x(2);
2919 let r = _mm_mask_shldv_epi64(a, 0, b, c);
2920 assert_eq_m128i(r, a);
2921 let r = _mm_mask_shldv_epi64(a, 0b00000011, b, c);
2922 let e = _mm_set1_epi64x(6);
2923 assert_eq_m128i(r, e);
2924 }
2925
2926 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2927 unsafe fn test_mm_maskz_shldv_epi64() {
2928 let a = _mm_set1_epi64x(1);
2929 let b = _mm_set1_epi64x(1 << 63);
2930 let c = _mm_set1_epi64x(2);
2931 let r = _mm_maskz_shldv_epi64(0, a, b, c);
2932 assert_eq_m128i(r, _mm_setzero_si128());
2933 let r = _mm_maskz_shldv_epi64(0b00000011, a, b, c);
2934 let e = _mm_set1_epi64x(6);
2935 assert_eq_m128i(r, e);
2936 }
2937
2938 #[simd_test(enable = "avx512vbmi2")]
2939 unsafe fn test_mm512_shldv_epi32() {
2940 let a = _mm512_set1_epi32(1);
2941 let b = _mm512_set1_epi32(1 << 31);
2942 let c = _mm512_set1_epi32(2);
2943 let r = _mm512_shldv_epi32(a, b, c);
2944 let e = _mm512_set1_epi32(6);
2945 assert_eq_m512i(r, e);
2946 }
2947
2948 #[simd_test(enable = "avx512vbmi2")]
2949 unsafe fn test_mm512_mask_shldv_epi32() {
2950 let a = _mm512_set1_epi32(1);
2951 let b = _mm512_set1_epi32(1 << 31);
2952 let c = _mm512_set1_epi32(2);
2953 let r = _mm512_mask_shldv_epi32(a, 0, b, c);
2954 assert_eq_m512i(r, a);
2955 let r = _mm512_mask_shldv_epi32(a, 0b11111111_11111111, b, c);
2956 let e = _mm512_set1_epi32(6);
2957 assert_eq_m512i(r, e);
2958 }
2959
2960 #[simd_test(enable = "avx512vbmi2")]
2961 unsafe fn test_mm512_maskz_shldv_epi32() {
2962 let a = _mm512_set1_epi32(1);
2963 let b = _mm512_set1_epi32(1 << 31);
2964 let c = _mm512_set1_epi32(2);
2965 let r = _mm512_maskz_shldv_epi32(0, a, b, c);
2966 assert_eq_m512i(r, _mm512_setzero_si512());
2967 let r = _mm512_maskz_shldv_epi32(0b11111111_11111111, a, b, c);
2968 let e = _mm512_set1_epi32(6);
2969 assert_eq_m512i(r, e);
2970 }
2971
2972 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2973 unsafe fn test_mm256_shldv_epi32() {
2974 let a = _mm256_set1_epi32(1);
2975 let b = _mm256_set1_epi32(1 << 31);
2976 let c = _mm256_set1_epi32(2);
2977 let r = _mm256_shldv_epi32(a, b, c);
2978 let e = _mm256_set1_epi32(6);
2979 assert_eq_m256i(r, e);
2980 }
2981
2982 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2983 unsafe fn test_mm256_mask_shldv_epi32() {
2984 let a = _mm256_set1_epi32(1);
2985 let b = _mm256_set1_epi32(1 << 31);
2986 let c = _mm256_set1_epi32(2);
2987 let r = _mm256_mask_shldv_epi32(a, 0, b, c);
2988 assert_eq_m256i(r, a);
2989 let r = _mm256_mask_shldv_epi32(a, 0b11111111, b, c);
2990 let e = _mm256_set1_epi32(6);
2991 assert_eq_m256i(r, e);
2992 }
2993
2994 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2995 unsafe fn test_mm256_maskz_shldv_epi32() {
2996 let a = _mm256_set1_epi32(1);
2997 let b = _mm256_set1_epi32(1 << 31);
2998 let c = _mm256_set1_epi32(2);
2999 let r = _mm256_maskz_shldv_epi32(0, a, b, c);
3000 assert_eq_m256i(r, _mm256_setzero_si256());
3001 let r = _mm256_maskz_shldv_epi32(0b11111111, a, b, c);
3002 let e = _mm256_set1_epi32(6);
3003 assert_eq_m256i(r, e);
3004 }
3005
3006 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3007 unsafe fn test_mm_shldv_epi32() {
3008 let a = _mm_set1_epi32(1);
3009 let b = _mm_set1_epi32(1 << 31);
3010 let c = _mm_set1_epi32(2);
3011 let r = _mm_shldv_epi32(a, b, c);
3012 let e = _mm_set1_epi32(6);
3013 assert_eq_m128i(r, e);
3014 }
3015
3016 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3017 unsafe fn test_mm_mask_shldv_epi32() {
3018 let a = _mm_set1_epi32(1);
3019 let b = _mm_set1_epi32(1 << 31);
3020 let c = _mm_set1_epi32(2);
3021 let r = _mm_mask_shldv_epi32(a, 0, b, c);
3022 assert_eq_m128i(r, a);
3023 let r = _mm_mask_shldv_epi32(a, 0b00001111, b, c);
3024 let e = _mm_set1_epi32(6);
3025 assert_eq_m128i(r, e);
3026 }
3027
3028 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3029 unsafe fn test_mm_maskz_shldv_epi32() {
3030 let a = _mm_set1_epi32(1);
3031 let b = _mm_set1_epi32(1 << 31);
3032 let c = _mm_set1_epi32(2);
3033 let r = _mm_maskz_shldv_epi32(0, a, b, c);
3034 assert_eq_m128i(r, _mm_setzero_si128());
3035 let r = _mm_maskz_shldv_epi32(0b00001111, a, b, c);
3036 let e = _mm_set1_epi32(6);
3037 assert_eq_m128i(r, e);
3038 }
3039
3040 #[simd_test(enable = "avx512vbmi2")]
3041 unsafe fn test_mm512_shldv_epi16() {
3042 let a = _mm512_set1_epi16(1);
3043 let b = _mm512_set1_epi16(1 << 15);
3044 let c = _mm512_set1_epi16(2);
3045 let r = _mm512_shldv_epi16(a, b, c);
3046 let e = _mm512_set1_epi16(6);
3047 assert_eq_m512i(r, e);
3048 }
3049
3050 #[simd_test(enable = "avx512vbmi2")]
3051 unsafe fn test_mm512_mask_shldv_epi16() {
3052 let a = _mm512_set1_epi16(1);
3053 let b = _mm512_set1_epi16(1 << 15);
3054 let c = _mm512_set1_epi16(2);
3055 let r = _mm512_mask_shldv_epi16(a, 0, b, c);
3056 assert_eq_m512i(r, a);
3057 let r = _mm512_mask_shldv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
3058 let e = _mm512_set1_epi16(6);
3059 assert_eq_m512i(r, e);
3060 }
3061
3062 #[simd_test(enable = "avx512vbmi2")]
3063 unsafe fn test_mm512_maskz_shldv_epi16() {
3064 let a = _mm512_set1_epi16(1);
3065 let b = _mm512_set1_epi16(1 << 15);
3066 let c = _mm512_set1_epi16(2);
3067 let r = _mm512_maskz_shldv_epi16(0, a, b, c);
3068 assert_eq_m512i(r, _mm512_setzero_si512());
3069 let r = _mm512_maskz_shldv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
3070 let e = _mm512_set1_epi16(6);
3071 assert_eq_m512i(r, e);
3072 }
3073
3074 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3075 unsafe fn test_mm256_shldv_epi16() {
3076 let a = _mm256_set1_epi16(1);
3077 let b = _mm256_set1_epi16(1 << 15);
3078 let c = _mm256_set1_epi16(2);
3079 let r = _mm256_shldv_epi16(a, b, c);
3080 let e = _mm256_set1_epi16(6);
3081 assert_eq_m256i(r, e);
3082 }
3083
3084 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3085 unsafe fn test_mm256_mask_shldv_epi16() {
3086 let a = _mm256_set1_epi16(1);
3087 let b = _mm256_set1_epi16(1 << 15);
3088 let c = _mm256_set1_epi16(2);
3089 let r = _mm256_mask_shldv_epi16(a, 0, b, c);
3090 assert_eq_m256i(r, a);
3091 let r = _mm256_mask_shldv_epi16(a, 0b11111111_11111111, b, c);
3092 let e = _mm256_set1_epi16(6);
3093 assert_eq_m256i(r, e);
3094 }
3095
3096 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3097 unsafe fn test_mm256_maskz_shldv_epi16() {
3098 let a = _mm256_set1_epi16(1);
3099 let b = _mm256_set1_epi16(1 << 15);
3100 let c = _mm256_set1_epi16(2);
3101 let r = _mm256_maskz_shldv_epi16(0, a, b, c);
3102 assert_eq_m256i(r, _mm256_setzero_si256());
3103 let r = _mm256_maskz_shldv_epi16(0b11111111_11111111, a, b, c);
3104 let e = _mm256_set1_epi16(6);
3105 assert_eq_m256i(r, e);
3106 }
3107
3108 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3109 unsafe fn test_mm_shldv_epi16() {
3110 let a = _mm_set1_epi16(1);
3111 let b = _mm_set1_epi16(1 << 15);
3112 let c = _mm_set1_epi16(2);
3113 let r = _mm_shldv_epi16(a, b, c);
3114 let e = _mm_set1_epi16(6);
3115 assert_eq_m128i(r, e);
3116 }
3117
3118 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3119 unsafe fn test_mm_mask_shldv_epi16() {
3120 let a = _mm_set1_epi16(1);
3121 let b = _mm_set1_epi16(1 << 15);
3122 let c = _mm_set1_epi16(2);
3123 let r = _mm_mask_shldv_epi16(a, 0, b, c);
3124 assert_eq_m128i(r, a);
3125 let r = _mm_mask_shldv_epi16(a, 0b11111111, b, c);
3126 let e = _mm_set1_epi16(6);
3127 assert_eq_m128i(r, e);
3128 }
3129
3130 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3131 unsafe fn test_mm_maskz_shldv_epi16() {
3132 let a = _mm_set1_epi16(1);
3133 let b = _mm_set1_epi16(1 << 15);
3134 let c = _mm_set1_epi16(2);
3135 let r = _mm_maskz_shldv_epi16(0, a, b, c);
3136 assert_eq_m128i(r, _mm_setzero_si128());
3137 let r = _mm_maskz_shldv_epi16(0b11111111, a, b, c);
3138 let e = _mm_set1_epi16(6);
3139 assert_eq_m128i(r, e);
3140 }
3141
3142 #[simd_test(enable = "avx512vbmi2")]
3143 unsafe fn test_mm512_shrdv_epi64() {
3144 let a = _mm512_set1_epi64(8);
3145 let b = _mm512_set1_epi64(2);
3146 let c = _mm512_set1_epi64(1);
3147 let r = _mm512_shrdv_epi64(a, b, c);
3148 let e = _mm512_set1_epi64(1);
3149 assert_eq_m512i(r, e);
3150 }
3151
3152 #[simd_test(enable = "avx512vbmi2")]
3153 unsafe fn test_mm512_mask_shrdv_epi64() {
3154 let a = _mm512_set1_epi64(8);
3155 let b = _mm512_set1_epi64(2);
3156 let c = _mm512_set1_epi64(1);
3157 let r = _mm512_mask_shrdv_epi64(a, 0, b, c);
3158 assert_eq_m512i(r, a);
3159 let r = _mm512_mask_shrdv_epi64(a, 0b11111111, b, c);
3160 let e = _mm512_set1_epi64(1);
3161 assert_eq_m512i(r, e);
3162 }
3163
3164 #[simd_test(enable = "avx512vbmi2")]
3165 unsafe fn test_mm512_maskz_shrdv_epi64() {
3166 let a = _mm512_set1_epi64(8);
3167 let b = _mm512_set1_epi64(2);
3168 let c = _mm512_set1_epi64(1);
3169 let r = _mm512_maskz_shrdv_epi64(0, a, b, c);
3170 assert_eq_m512i(r, _mm512_setzero_si512());
3171 let r = _mm512_maskz_shrdv_epi64(0b11111111, a, b, c);
3172 let e = _mm512_set1_epi64(1);
3173 assert_eq_m512i(r, e);
3174 }
3175
3176 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3177 unsafe fn test_mm256_shrdv_epi64() {
3178 let a = _mm256_set1_epi64x(8);
3179 let b = _mm256_set1_epi64x(2);
3180 let c = _mm256_set1_epi64x(1);
3181 let r = _mm256_shrdv_epi64(a, b, c);
3182 let e = _mm256_set1_epi64x(1);
3183 assert_eq_m256i(r, e);
3184 }
3185
3186 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3187 unsafe fn test_mm256_mask_shrdv_epi64() {
3188 let a = _mm256_set1_epi64x(8);
3189 let b = _mm256_set1_epi64x(2);
3190 let c = _mm256_set1_epi64x(1);
3191 let r = _mm256_mask_shrdv_epi64(a, 0, b, c);
3192 assert_eq_m256i(r, a);
3193 let r = _mm256_mask_shrdv_epi64(a, 0b00001111, b, c);
3194 let e = _mm256_set1_epi64x(1);
3195 assert_eq_m256i(r, e);
3196 }
3197
3198 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3199 unsafe fn test_mm256_maskz_shrdv_epi64() {
3200 let a = _mm256_set1_epi64x(8);
3201 let b = _mm256_set1_epi64x(2);
3202 let c = _mm256_set1_epi64x(1);
3203 let r = _mm256_maskz_shrdv_epi64(0, a, b, c);
3204 assert_eq_m256i(r, _mm256_setzero_si256());
3205 let r = _mm256_maskz_shrdv_epi64(0b00001111, a, b, c);
3206 let e = _mm256_set1_epi64x(1);
3207 assert_eq_m256i(r, e);
3208 }
3209
3210 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3211 unsafe fn test_mm_shrdv_epi64() {
3212 let a = _mm_set1_epi64x(8);
3213 let b = _mm_set1_epi64x(2);
3214 let c = _mm_set1_epi64x(1);
3215 let r = _mm_shrdv_epi64(a, b, c);
3216 let e = _mm_set1_epi64x(1);
3217 assert_eq_m128i(r, e);
3218 }
3219
3220 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3221 unsafe fn test_mm_mask_shrdv_epi64() {
3222 let a = _mm_set1_epi64x(8);
3223 let b = _mm_set1_epi64x(2);
3224 let c = _mm_set1_epi64x(1);
3225 let r = _mm_mask_shrdv_epi64(a, 0, b, c);
3226 assert_eq_m128i(r, a);
3227 let r = _mm_mask_shrdv_epi64(a, 0b00000011, b, c);
3228 let e = _mm_set1_epi64x(1);
3229 assert_eq_m128i(r, e);
3230 }
3231
3232 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3233 unsafe fn test_mm_maskz_shrdv_epi64() {
3234 let a = _mm_set1_epi64x(8);
3235 let b = _mm_set1_epi64x(2);
3236 let c = _mm_set1_epi64x(1);
3237 let r = _mm_maskz_shrdv_epi64(0, a, b, c);
3238 assert_eq_m128i(r, _mm_setzero_si128());
3239 let r = _mm_maskz_shrdv_epi64(0b00000011, a, b, c);
3240 let e = _mm_set1_epi64x(1);
3241 assert_eq_m128i(r, e);
3242 }
3243
3244 #[simd_test(enable = "avx512vbmi2")]
3245 unsafe fn test_mm512_shrdv_epi32() {
3246 let a = _mm512_set1_epi32(8);
3247 let b = _mm512_set1_epi32(2);
3248 let c = _mm512_set1_epi32(1);
3249 let r = _mm512_shrdv_epi32(a, b, c);
3250 let e = _mm512_set1_epi32(1);
3251 assert_eq_m512i(r, e);
3252 }
3253
3254 #[simd_test(enable = "avx512vbmi2")]
3255 unsafe fn test_mm512_mask_shrdv_epi32() {
3256 let a = _mm512_set1_epi32(8);
3257 let b = _mm512_set1_epi32(2);
3258 let c = _mm512_set1_epi32(1);
3259 let r = _mm512_mask_shrdv_epi32(a, 0, b, c);
3260 assert_eq_m512i(r, a);
3261 let r = _mm512_mask_shrdv_epi32(a, 0b11111111_11111111, b, c);
3262 let e = _mm512_set1_epi32(1);
3263 assert_eq_m512i(r, e);
3264 }
3265
3266 #[simd_test(enable = "avx512vbmi2")]
3267 unsafe fn test_mm512_maskz_shrdv_epi32() {
3268 let a = _mm512_set1_epi32(8);
3269 let b = _mm512_set1_epi32(2);
3270 let c = _mm512_set1_epi32(1);
3271 let r = _mm512_maskz_shrdv_epi32(0, a, b, c);
3272 assert_eq_m512i(r, _mm512_setzero_si512());
3273 let r = _mm512_maskz_shrdv_epi32(0b11111111_11111111, a, b, c);
3274 let e = _mm512_set1_epi32(1);
3275 assert_eq_m512i(r, e);
3276 }
3277
3278 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3279 unsafe fn test_mm256_shrdv_epi32() {
3280 let a = _mm256_set1_epi32(8);
3281 let b = _mm256_set1_epi32(2);
3282 let c = _mm256_set1_epi32(1);
3283 let r = _mm256_shrdv_epi32(a, b, c);
3284 let e = _mm256_set1_epi32(1);
3285 assert_eq_m256i(r, e);
3286 }
3287
3288 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3289 unsafe fn test_mm256_mask_shrdv_epi32() {
3290 let a = _mm256_set1_epi32(8);
3291 let b = _mm256_set1_epi32(2);
3292 let c = _mm256_set1_epi32(1);
3293 let r = _mm256_mask_shrdv_epi32(a, 0, b, c);
3294 assert_eq_m256i(r, a);
3295 let r = _mm256_mask_shrdv_epi32(a, 0b11111111, b, c);
3296 let e = _mm256_set1_epi32(1);
3297 assert_eq_m256i(r, e);
3298 }
3299
3300 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3301 unsafe fn test_mm256_maskz_shrdv_epi32() {
3302 let a = _mm256_set1_epi32(8);
3303 let b = _mm256_set1_epi32(2);
3304 let c = _mm256_set1_epi32(1);
3305 let r = _mm256_maskz_shrdv_epi32(0, a, b, c);
3306 assert_eq_m256i(r, _mm256_setzero_si256());
3307 let r = _mm256_maskz_shrdv_epi32(0b11111111, a, b, c);
3308 let e = _mm256_set1_epi32(1);
3309 assert_eq_m256i(r, e);
3310 }
3311
3312 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3313 unsafe fn test_mm_shrdv_epi32() {
3314 let a = _mm_set1_epi32(8);
3315 let b = _mm_set1_epi32(2);
3316 let c = _mm_set1_epi32(1);
3317 let r = _mm_shrdv_epi32(a, b, c);
3318 let e = _mm_set1_epi32(1);
3319 assert_eq_m128i(r, e);
3320 }
3321
3322 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3323 unsafe fn test_mm_mask_shrdv_epi32() {
3324 let a = _mm_set1_epi32(8);
3325 let b = _mm_set1_epi32(2);
3326 let c = _mm_set1_epi32(1);
3327 let r = _mm_mask_shrdv_epi32(a, 0, b, c);
3328 assert_eq_m128i(r, a);
3329 let r = _mm_mask_shrdv_epi32(a, 0b00001111, b, c);
3330 let e = _mm_set1_epi32(1);
3331 assert_eq_m128i(r, e);
3332 }
3333
3334 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3335 unsafe fn test_mm_maskz_shrdv_epi32() {
3336 let a = _mm_set1_epi32(8);
3337 let b = _mm_set1_epi32(2);
3338 let c = _mm_set1_epi32(1);
3339 let r = _mm_maskz_shrdv_epi32(0, a, b, c);
3340 assert_eq_m128i(r, _mm_setzero_si128());
3341 let r = _mm_maskz_shrdv_epi32(0b00001111, a, b, c);
3342 let e = _mm_set1_epi32(1);
3343 assert_eq_m128i(r, e);
3344 }
3345
3346 #[simd_test(enable = "avx512vbmi2")]
3347 unsafe fn test_mm512_shrdv_epi16() {
3348 let a = _mm512_set1_epi16(8);
3349 let b = _mm512_set1_epi16(2);
3350 let c = _mm512_set1_epi16(1);
3351 let r = _mm512_shrdv_epi16(a, b, c);
3352 let e = _mm512_set1_epi16(1);
3353 assert_eq_m512i(r, e);
3354 }
3355
3356 #[simd_test(enable = "avx512vbmi2")]
3357 unsafe fn test_mm512_mask_shrdv_epi16() {
3358 let a = _mm512_set1_epi16(8);
3359 let b = _mm512_set1_epi16(2);
3360 let c = _mm512_set1_epi16(1);
3361 let r = _mm512_mask_shrdv_epi16(a, 0, b, c);
3362 assert_eq_m512i(r, a);
3363 let r = _mm512_mask_shrdv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
3364 let e = _mm512_set1_epi16(1);
3365 assert_eq_m512i(r, e);
3366 }
3367
3368 #[simd_test(enable = "avx512vbmi2")]
3369 unsafe fn test_mm512_maskz_shrdv_epi16() {
3370 let a = _mm512_set1_epi16(8);
3371 let b = _mm512_set1_epi16(2);
3372 let c = _mm512_set1_epi16(1);
3373 let r = _mm512_maskz_shrdv_epi16(0, a, b, c);
3374 assert_eq_m512i(r, _mm512_setzero_si512());
3375 let r = _mm512_maskz_shrdv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
3376 let e = _mm512_set1_epi16(1);
3377 assert_eq_m512i(r, e);
3378 }
3379
3380 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3381 unsafe fn test_mm256_shrdv_epi16() {
3382 let a = _mm256_set1_epi16(8);
3383 let b = _mm256_set1_epi16(2);
3384 let c = _mm256_set1_epi16(1);
3385 let r = _mm256_shrdv_epi16(a, b, c);
3386 let e = _mm256_set1_epi16(1);
3387 assert_eq_m256i(r, e);
3388 }
3389
3390 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3391 unsafe fn test_mm256_mask_shrdv_epi16() {
3392 let a = _mm256_set1_epi16(8);
3393 let b = _mm256_set1_epi16(2);
3394 let c = _mm256_set1_epi16(1);
3395 let r = _mm256_mask_shrdv_epi16(a, 0, b, c);
3396 assert_eq_m256i(r, a);
3397 let r = _mm256_mask_shrdv_epi16(a, 0b11111111_11111111, b, c);
3398 let e = _mm256_set1_epi16(1);
3399 assert_eq_m256i(r, e);
3400 }
3401
3402 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3403 unsafe fn test_mm256_maskz_shrdv_epi16() {
3404 let a = _mm256_set1_epi16(8);
3405 let b = _mm256_set1_epi16(2);
3406 let c = _mm256_set1_epi16(1);
3407 let r = _mm256_maskz_shrdv_epi16(0, a, b, c);
3408 assert_eq_m256i(r, _mm256_setzero_si256());
3409 let r = _mm256_maskz_shrdv_epi16(0b11111111_11111111, a, b, c);
3410 let e = _mm256_set1_epi16(1);
3411 assert_eq_m256i(r, e);
3412 }
3413
3414 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3415 unsafe fn test_mm_shrdv_epi16() {
3416 let a = _mm_set1_epi16(8);
3417 let b = _mm_set1_epi16(2);
3418 let c = _mm_set1_epi16(1);
3419 let r = _mm_shrdv_epi16(a, b, c);
3420 let e = _mm_set1_epi16(1);
3421 assert_eq_m128i(r, e);
3422 }
3423
3424 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3425 unsafe fn test_mm_mask_shrdv_epi16() {
3426 let a = _mm_set1_epi16(8);
3427 let b = _mm_set1_epi16(2);
3428 let c = _mm_set1_epi16(1);
3429 let r = _mm_mask_shrdv_epi16(a, 0, b, c);
3430 assert_eq_m128i(r, a);
3431 let r = _mm_mask_shrdv_epi16(a, 0b11111111, b, c);
3432 let e = _mm_set1_epi16(1);
3433 assert_eq_m128i(r, e);
3434 }
3435
3436 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3437 unsafe fn test_mm_maskz_shrdv_epi16() {
3438 let a = _mm_set1_epi16(8);
3439 let b = _mm_set1_epi16(2);
3440 let c = _mm_set1_epi16(1);
3441 let r = _mm_maskz_shrdv_epi16(0, a, b, c);
3442 assert_eq_m128i(r, _mm_setzero_si128());
3443 let r = _mm_maskz_shrdv_epi16(0b11111111, a, b, c);
3444 let e = _mm_set1_epi16(1);
3445 assert_eq_m128i(r, e);
3446 }
3447
3448 #[simd_test(enable = "avx512vbmi2")]
3449 unsafe fn test_mm512_shldi_epi64() {
3450 let a = _mm512_set1_epi64(1);
3451 let b = _mm512_set1_epi64(1 << 63);
3452 let r = _mm512_shldi_epi64::<2>(a, b);
3453 let e = _mm512_set1_epi64(6);
3454 assert_eq_m512i(r, e);
3455 }
3456
3457 #[simd_test(enable = "avx512vbmi2")]
3458 unsafe fn test_mm512_mask_shldi_epi64() {
3459 let a = _mm512_set1_epi64(1);
3460 let b = _mm512_set1_epi64(1 << 63);
3461 let r = _mm512_mask_shldi_epi64::<2>(a, 0, a, b);
3462 assert_eq_m512i(r, a);
3463 let r = _mm512_mask_shldi_epi64::<2>(a, 0b11111111, a, b);
3464 let e = _mm512_set1_epi64(6);
3465 assert_eq_m512i(r, e);
3466 }
3467
3468 #[simd_test(enable = "avx512vbmi2")]
3469 unsafe fn test_mm512_maskz_shldi_epi64() {
3470 let a = _mm512_set1_epi64(1);
3471 let b = _mm512_set1_epi64(1 << 63);
3472 let r = _mm512_maskz_shldi_epi64::<2>(0, a, b);
3473 assert_eq_m512i(r, _mm512_setzero_si512());
3474 let r = _mm512_maskz_shldi_epi64::<2>(0b11111111, a, b);
3475 let e = _mm512_set1_epi64(6);
3476 assert_eq_m512i(r, e);
3477 }
3478
3479 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3480 unsafe fn test_mm256_shldi_epi64() {
3481 let a = _mm256_set1_epi64x(1);
3482 let b = _mm256_set1_epi64x(1 << 63);
3483 let r = _mm256_shldi_epi64::<2>(a, b);
3484 let e = _mm256_set1_epi64x(6);
3485 assert_eq_m256i(r, e);
3486 }
3487
3488 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3489 unsafe fn test_mm256_mask_shldi_epi64() {
3490 let a = _mm256_set1_epi64x(1);
3491 let b = _mm256_set1_epi64x(1 << 63);
3492 let r = _mm256_mask_shldi_epi64::<2>(a, 0, a, b);
3493 assert_eq_m256i(r, a);
3494 let r = _mm256_mask_shldi_epi64::<2>(a, 0b00001111, a, b);
3495 let e = _mm256_set1_epi64x(6);
3496 assert_eq_m256i(r, e);
3497 }
3498
3499 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3500 unsafe fn test_mm256_maskz_shldi_epi64() {
3501 let a = _mm256_set1_epi64x(1);
3502 let b = _mm256_set1_epi64x(1 << 63);
3503 let r = _mm256_maskz_shldi_epi64::<2>(0, a, b);
3504 assert_eq_m256i(r, _mm256_setzero_si256());
3505 let r = _mm256_maskz_shldi_epi64::<2>(0b00001111, a, b);
3506 let e = _mm256_set1_epi64x(6);
3507 assert_eq_m256i(r, e);
3508 }
3509
3510 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3511 unsafe fn test_mm_shldi_epi64() {
3512 let a = _mm_set1_epi64x(1);
3513 let b = _mm_set1_epi64x(1 << 63);
3514 let r = _mm_shldi_epi64::<2>(a, b);
3515 let e = _mm_set1_epi64x(6);
3516 assert_eq_m128i(r, e);
3517 }
3518
3519 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3520 unsafe fn test_mm_mask_shldi_epi64() {
3521 let a = _mm_set1_epi64x(1);
3522 let b = _mm_set1_epi64x(1 << 63);
3523 let r = _mm_mask_shldi_epi64::<2>(a, 0, a, b);
3524 assert_eq_m128i(r, a);
3525 let r = _mm_mask_shldi_epi64::<2>(a, 0b00000011, a, b);
3526 let e = _mm_set1_epi64x(6);
3527 assert_eq_m128i(r, e);
3528 }
3529
3530 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3531 unsafe fn test_mm_maskz_shldi_epi64() {
3532 let a = _mm_set1_epi64x(1);
3533 let b = _mm_set1_epi64x(1 << 63);
3534 let r = _mm_maskz_shldi_epi64::<2>(0, a, b);
3535 assert_eq_m128i(r, _mm_setzero_si128());
3536 let r = _mm_maskz_shldi_epi64::<2>(0b00000011, a, b);
3537 let e = _mm_set1_epi64x(6);
3538 assert_eq_m128i(r, e);
3539 }
3540
3541 #[simd_test(enable = "avx512vbmi2")]
3542 unsafe fn test_mm512_shldi_epi32() {
3543 let a = _mm512_set1_epi32(1);
3544 let b = _mm512_set1_epi32(1 << 31);
3545 let r = _mm512_shldi_epi32::<2>(a, b);
3546 let e = _mm512_set1_epi32(6);
3547 assert_eq_m512i(r, e);
3548 }
3549
3550 #[simd_test(enable = "avx512vbmi2")]
3551 unsafe fn test_mm512_mask_shldi_epi32() {
3552 let a = _mm512_set1_epi32(1);
3553 let b = _mm512_set1_epi32(1 << 31);
3554 let r = _mm512_mask_shldi_epi32::<2>(a, 0, a, b);
3555 assert_eq_m512i(r, a);
3556 let r = _mm512_mask_shldi_epi32::<2>(a, 0b11111111_11111111, a, b);
3557 let e = _mm512_set1_epi32(6);
3558 assert_eq_m512i(r, e);
3559 }
3560
3561 #[simd_test(enable = "avx512vbmi2")]
3562 unsafe fn test_mm512_maskz_shldi_epi32() {
3563 let a = _mm512_set1_epi32(1);
3564 let b = _mm512_set1_epi32(1 << 31);
3565 let r = _mm512_maskz_shldi_epi32::<2>(0, a, b);
3566 assert_eq_m512i(r, _mm512_setzero_si512());
3567 let r = _mm512_maskz_shldi_epi32::<2>(0b11111111_11111111, a, b);
3568 let e = _mm512_set1_epi32(6);
3569 assert_eq_m512i(r, e);
3570 }
3571
3572 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3573 unsafe fn test_mm256_shldi_epi32() {
3574 let a = _mm256_set1_epi32(1);
3575 let b = _mm256_set1_epi32(1 << 31);
3576 let r = _mm256_shldi_epi32::<2>(a, b);
3577 let e = _mm256_set1_epi32(6);
3578 assert_eq_m256i(r, e);
3579 }
3580
3581 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3582 unsafe fn test_mm256_mask_shldi_epi32() {
3583 let a = _mm256_set1_epi32(1);
3584 let b = _mm256_set1_epi32(1 << 31);
3585 let r = _mm256_mask_shldi_epi32::<2>(a, 0, a, b);
3586 assert_eq_m256i(r, a);
3587 let r = _mm256_mask_shldi_epi32::<2>(a, 0b11111111, a, b);
3588 let e = _mm256_set1_epi32(6);
3589 assert_eq_m256i(r, e);
3590 }
3591
3592 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3593 unsafe fn test_mm256_maskz_shldi_epi32() {
3594 let a = _mm256_set1_epi32(1);
3595 let b = _mm256_set1_epi32(1 << 31);
3596 let r = _mm256_maskz_shldi_epi32::<2>(0, a, b);
3597 assert_eq_m256i(r, _mm256_setzero_si256());
3598 let r = _mm256_maskz_shldi_epi32::<2>(0b11111111, a, b);
3599 let e = _mm256_set1_epi32(6);
3600 assert_eq_m256i(r, e);
3601 }
3602
3603 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3604 unsafe fn test_mm_shldi_epi32() {
3605 let a = _mm_set1_epi32(1);
3606 let b = _mm_set1_epi32(1 << 31);
3607 let r = _mm_shldi_epi32::<2>(a, b);
3608 let e = _mm_set1_epi32(6);
3609 assert_eq_m128i(r, e);
3610 }
3611
3612 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3613 unsafe fn test_mm_mask_shldi_epi32() {
3614 let a = _mm_set1_epi32(1);
3615 let b = _mm_set1_epi32(1 << 31);
3616 let r = _mm_mask_shldi_epi32::<2>(a, 0, a, b);
3617 assert_eq_m128i(r, a);
3618 let r = _mm_mask_shldi_epi32::<2>(a, 0b00001111, a, b);
3619 let e = _mm_set1_epi32(6);
3620 assert_eq_m128i(r, e);
3621 }
3622
3623 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3624 unsafe fn test_mm_maskz_shldi_epi32() {
3625 let a = _mm_set1_epi32(1);
3626 let b = _mm_set1_epi32(1 << 31);
3627 let r = _mm_maskz_shldi_epi32::<2>(0, a, b);
3628 assert_eq_m128i(r, _mm_setzero_si128());
3629 let r = _mm_maskz_shldi_epi32::<2>(0b00001111, a, b);
3630 let e = _mm_set1_epi32(6);
3631 assert_eq_m128i(r, e);
3632 }
3633
3634 #[simd_test(enable = "avx512vbmi2")]
3635 unsafe fn test_mm512_shldi_epi16() {
3636 let a = _mm512_set1_epi16(1);
3637 let b = _mm512_set1_epi16(1 << 15);
3638 let r = _mm512_shldi_epi16::<2>(a, b);
3639 let e = _mm512_set1_epi16(6);
3640 assert_eq_m512i(r, e);
3641 }
3642
3643 #[simd_test(enable = "avx512vbmi2")]
3644 unsafe fn test_mm512_mask_shldi_epi16() {
3645 let a = _mm512_set1_epi16(1);
3646 let b = _mm512_set1_epi16(1 << 15);
3647 let r = _mm512_mask_shldi_epi16::<2>(a, 0, a, b);
3648 assert_eq_m512i(r, a);
3649 let r = _mm512_mask_shldi_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a, b);
3650 let e = _mm512_set1_epi16(6);
3651 assert_eq_m512i(r, e);
3652 }
3653
3654 #[simd_test(enable = "avx512vbmi2")]
3655 unsafe fn test_mm512_maskz_shldi_epi16() {
3656 let a = _mm512_set1_epi16(1);
3657 let b = _mm512_set1_epi16(1 << 15);
3658 let r = _mm512_maskz_shldi_epi16::<2>(0, a, b);
3659 assert_eq_m512i(r, _mm512_setzero_si512());
3660 let r = _mm512_maskz_shldi_epi16::<2>(0b11111111_11111111_11111111_11111111, a, b);
3661 let e = _mm512_set1_epi16(6);
3662 assert_eq_m512i(r, e);
3663 }
3664
3665 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3666 unsafe fn test_mm256_shldi_epi16() {
3667 let a = _mm256_set1_epi16(1);
3668 let b = _mm256_set1_epi16(1 << 15);
3669 let r = _mm256_shldi_epi16::<2>(a, b);
3670 let e = _mm256_set1_epi16(6);
3671 assert_eq_m256i(r, e);
3672 }
3673
3674 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3675 unsafe fn test_mm256_mask_shldi_epi16() {
3676 let a = _mm256_set1_epi16(1);
3677 let b = _mm256_set1_epi16(1 << 15);
3678 let r = _mm256_mask_shldi_epi16::<2>(a, 0, a, b);
3679 assert_eq_m256i(r, a);
3680 let r = _mm256_mask_shldi_epi16::<2>(a, 0b11111111_11111111, a, b);
3681 let e = _mm256_set1_epi16(6);
3682 assert_eq_m256i(r, e);
3683 }
3684
3685 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3686 unsafe fn test_mm256_maskz_shldi_epi16() {
3687 let a = _mm256_set1_epi16(1);
3688 let b = _mm256_set1_epi16(1 << 15);
3689 let r = _mm256_maskz_shldi_epi16::<2>(0, a, b);
3690 assert_eq_m256i(r, _mm256_setzero_si256());
3691 let r = _mm256_maskz_shldi_epi16::<2>(0b11111111_11111111, a, b);
3692 let e = _mm256_set1_epi16(6);
3693 assert_eq_m256i(r, e);
3694 }
3695
3696 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3697 unsafe fn test_mm_shldi_epi16() {
3698 let a = _mm_set1_epi16(1);
3699 let b = _mm_set1_epi16(1 << 15);
3700 let r = _mm_shldi_epi16::<2>(a, b);
3701 let e = _mm_set1_epi16(6);
3702 assert_eq_m128i(r, e);
3703 }
3704
3705 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3706 unsafe fn test_mm_mask_shldi_epi16() {
3707 let a = _mm_set1_epi16(1);
3708 let b = _mm_set1_epi16(1 << 15);
3709 let r = _mm_mask_shldi_epi16::<2>(a, 0, a, b);
3710 assert_eq_m128i(r, a);
3711 let r = _mm_mask_shldi_epi16::<2>(a, 0b11111111, a, b);
3712 let e = _mm_set1_epi16(6);
3713 assert_eq_m128i(r, e);
3714 }
3715
3716 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3717 unsafe fn test_mm_maskz_shldi_epi16() {
3718 let a = _mm_set1_epi16(1);
3719 let b = _mm_set1_epi16(1 << 15);
3720 let r = _mm_maskz_shldi_epi16::<2>(0, a, b);
3721 assert_eq_m128i(r, _mm_setzero_si128());
3722 let r = _mm_maskz_shldi_epi16::<2>(0b11111111, a, b);
3723 let e = _mm_set1_epi16(6);
3724 assert_eq_m128i(r, e);
3725 }
3726
3727 #[simd_test(enable = "avx512vbmi2")]
3728 unsafe fn test_mm512_shrdi_epi64() {
3729 let a = _mm512_set1_epi64(8);
3730 let b = _mm512_set1_epi64(2);
3731 let r = _mm512_shrdi_epi64::<1>(a, b);
3732 let e = _mm512_set1_epi64(1);
3733 assert_eq_m512i(r, e);
3734 }
3735
3736 #[simd_test(enable = "avx512vbmi2")]
3737 unsafe fn test_mm512_mask_shrdi_epi64() {
3738 let a = _mm512_set1_epi64(8);
3739 let b = _mm512_set1_epi64(2);
3740 let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b);
3741 assert_eq_m512i(r, a);
3742 let r = _mm512_mask_shrdi_epi64::<1>(a, 0b11111111, a, b);
3743 let e = _mm512_set1_epi64(1);
3744 assert_eq_m512i(r, e);
3745 }
3746
3747 #[simd_test(enable = "avx512vbmi2")]
3748 unsafe fn test_mm512_maskz_shrdi_epi64() {
3749 let a = _mm512_set1_epi64(8);
3750 let b = _mm512_set1_epi64(2);
3751 let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b);
3752 assert_eq_m512i(r, _mm512_setzero_si512());
3753 let r = _mm512_maskz_shrdi_epi64::<1>(0b11111111, a, b);
3754 let e = _mm512_set1_epi64(1);
3755 assert_eq_m512i(r, e);
3756 }
3757
3758 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3759 unsafe fn test_mm256_shrdi_epi64() {
3760 let a = _mm256_set1_epi64x(8);
3761 let b = _mm256_set1_epi64x(2);
3762 let r = _mm256_shrdi_epi64::<1>(a, b);
3763 let e = _mm256_set1_epi64x(1);
3764 assert_eq_m256i(r, e);
3765 }
3766
3767 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3768 unsafe fn test_mm256_mask_shrdi_epi64() {
3769 let a = _mm256_set1_epi64x(8);
3770 let b = _mm256_set1_epi64x(2);
3771 let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b);
3772 assert_eq_m256i(r, a);
3773 let r = _mm256_mask_shrdi_epi64::<1>(a, 0b00001111, a, b);
3774 let e = _mm256_set1_epi64x(1);
3775 assert_eq_m256i(r, e);
3776 }
3777
3778 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3779 unsafe fn test_mm256_maskz_shrdi_epi64() {
3780 let a = _mm256_set1_epi64x(8);
3781 let b = _mm256_set1_epi64x(2);
3782 let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b);
3783 assert_eq_m256i(r, _mm256_setzero_si256());
3784 let r = _mm256_maskz_shrdi_epi64::<1>(0b00001111, a, b);
3785 let e = _mm256_set1_epi64x(1);
3786 assert_eq_m256i(r, e);
3787 }
3788
3789 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3790 unsafe fn test_mm_shrdi_epi64() {
3791 let a = _mm_set1_epi64x(8);
3792 let b = _mm_set1_epi64x(2);
3793 let r = _mm_shrdi_epi64::<1>(a, b);
3794 let e = _mm_set1_epi64x(1);
3795 assert_eq_m128i(r, e);
3796 }
3797
3798 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3799 unsafe fn test_mm_mask_shrdi_epi64() {
3800 let a = _mm_set1_epi64x(8);
3801 let b = _mm_set1_epi64x(2);
3802 let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b);
3803 assert_eq_m128i(r, a);
3804 let r = _mm_mask_shrdi_epi64::<1>(a, 0b00000011, a, b);
3805 let e = _mm_set1_epi64x(1);
3806 assert_eq_m128i(r, e);
3807 }
3808
3809 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3810 unsafe fn test_mm_maskz_shrdi_epi64() {
3811 let a = _mm_set1_epi64x(8);
3812 let b = _mm_set1_epi64x(2);
3813 let r = _mm_maskz_shrdi_epi64::<1>(0, a, b);
3814 assert_eq_m128i(r, _mm_setzero_si128());
3815 let r = _mm_maskz_shrdi_epi64::<1>(0b00000011, a, b);
3816 let e = _mm_set1_epi64x(1);
3817 assert_eq_m128i(r, e);
3818 }
3819
3820 #[simd_test(enable = "avx512vbmi2")]
3821 unsafe fn test_mm512_shrdi_epi32() {
3822 let a = _mm512_set1_epi32(8);
3823 let b = _mm512_set1_epi32(2);
3824 let r = _mm512_shrdi_epi32::<1>(a, b);
3825 let e = _mm512_set1_epi32(1);
3826 assert_eq_m512i(r, e);
3827 }
3828
3829 #[simd_test(enable = "avx512vbmi2")]
3830 unsafe fn test_mm512_mask_shrdi_epi32() {
3831 let a = _mm512_set1_epi32(8);
3832 let b = _mm512_set1_epi32(2);
3833 let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b);
3834 assert_eq_m512i(r, a);
3835 let r = _mm512_mask_shrdi_epi32::<1>(a, 0b11111111_11111111, a, b);
3836 let e = _mm512_set1_epi32(1);
3837 assert_eq_m512i(r, e);
3838 }
3839
3840 #[simd_test(enable = "avx512vbmi2")]
3841 unsafe fn test_mm512_maskz_shrdi_epi32() {
3842 let a = _mm512_set1_epi32(8);
3843 let b = _mm512_set1_epi32(2);
3844 let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b);
3845 assert_eq_m512i(r, _mm512_setzero_si512());
3846 let r = _mm512_maskz_shrdi_epi32::<1>(0b11111111_11111111, a, b);
3847 let e = _mm512_set1_epi32(1);
3848 assert_eq_m512i(r, e);
3849 }
3850
3851 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3852 unsafe fn test_mm256_shrdi_epi32() {
3853 let a = _mm256_set1_epi32(8);
3854 let b = _mm256_set1_epi32(2);
3855 let r = _mm256_shrdi_epi32::<1>(a, b);
3856 let e = _mm256_set1_epi32(1);
3857 assert_eq_m256i(r, e);
3858 }
3859
3860 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3861 unsafe fn test_mm256_mask_shrdi_epi32() {
3862 let a = _mm256_set1_epi32(8);
3863 let b = _mm256_set1_epi32(2);
3864 let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b);
3865 assert_eq_m256i(r, a);
3866 let r = _mm256_mask_shrdi_epi32::<1>(a, 0b11111111, a, b);
3867 let e = _mm256_set1_epi32(1);
3868 assert_eq_m256i(r, e);
3869 }
3870
3871 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3872 unsafe fn test_mm256_maskz_shrdi_epi32() {
3873 let a = _mm256_set1_epi32(8);
3874 let b = _mm256_set1_epi32(2);
3875 let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b);
3876 assert_eq_m256i(r, _mm256_setzero_si256());
3877 let r = _mm256_maskz_shrdi_epi32::<1>(0b11111111, a, b);
3878 let e = _mm256_set1_epi32(1);
3879 assert_eq_m256i(r, e);
3880 }
3881
3882 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3883 unsafe fn test_mm_shrdi_epi32() {
3884 let a = _mm_set1_epi32(8);
3885 let b = _mm_set1_epi32(2);
3886 let r = _mm_shrdi_epi32::<1>(a, b);
3887 let e = _mm_set1_epi32(1);
3888 assert_eq_m128i(r, e);
3889 }
3890
3891 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3892 unsafe fn test_mm_mask_shrdi_epi32() {
3893 let a = _mm_set1_epi32(8);
3894 let b = _mm_set1_epi32(2);
3895 let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b);
3896 assert_eq_m128i(r, a);
3897 let r = _mm_mask_shrdi_epi32::<1>(a, 0b00001111, a, b);
3898 let e = _mm_set1_epi32(1);
3899 assert_eq_m128i(r, e);
3900 }
3901
3902 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3903 unsafe fn test_mm_maskz_shrdi_epi32() {
3904 let a = _mm_set1_epi32(8);
3905 let b = _mm_set1_epi32(2);
3906 let r = _mm_maskz_shrdi_epi32::<1>(0, a, b);
3907 assert_eq_m128i(r, _mm_setzero_si128());
3908 let r = _mm_maskz_shrdi_epi32::<1>(0b00001111, a, b);
3909 let e = _mm_set1_epi32(1);
3910 assert_eq_m128i(r, e);
3911 }
3912
3913 #[simd_test(enable = "avx512vbmi2")]
3914 unsafe fn test_mm512_shrdi_epi16() {
3915 let a = _mm512_set1_epi16(8);
3916 let b = _mm512_set1_epi16(2);
3917 let r = _mm512_shrdi_epi16::<1>(a, b);
3918 let e = _mm512_set1_epi16(1);
3919 assert_eq_m512i(r, e);
3920 }
3921
3922 #[simd_test(enable = "avx512vbmi2")]
3923 unsafe fn test_mm512_mask_shrdi_epi16() {
3924 let a = _mm512_set1_epi16(8);
3925 let b = _mm512_set1_epi16(2);
3926 let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b);
3927 assert_eq_m512i(r, a);
3928 let r = _mm512_mask_shrdi_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a, b);
3929 let e = _mm512_set1_epi16(1);
3930 assert_eq_m512i(r, e);
3931 }
3932
3933 #[simd_test(enable = "avx512vbmi2")]
3934 unsafe fn test_mm512_maskz_shrdi_epi16() {
3935 let a = _mm512_set1_epi16(8);
3936 let b = _mm512_set1_epi16(2);
3937 let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b);
3938 assert_eq_m512i(r, _mm512_setzero_si512());
3939 let r = _mm512_maskz_shrdi_epi16::<1>(0b11111111_11111111_11111111_11111111, a, b);
3940 let e = _mm512_set1_epi16(1);
3941 assert_eq_m512i(r, e);
3942 }
3943
3944 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3945 unsafe fn test_mm256_shrdi_epi16() {
3946 let a = _mm256_set1_epi16(8);
3947 let b = _mm256_set1_epi16(2);
3948 let r = _mm256_shrdi_epi16::<1>(a, b);
3949 let e = _mm256_set1_epi16(1);
3950 assert_eq_m256i(r, e);
3951 }
3952
3953 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3954 unsafe fn test_mm256_mask_shrdi_epi16() {
3955 let a = _mm256_set1_epi16(8);
3956 let b = _mm256_set1_epi16(2);
3957 let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b);
3958 assert_eq_m256i(r, a);
3959 let r = _mm256_mask_shrdi_epi16::<1>(a, 0b11111111_11111111, a, b);
3960 let e = _mm256_set1_epi16(1);
3961 assert_eq_m256i(r, e);
3962 }
3963
3964 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3965 unsafe fn test_mm256_maskz_shrdi_epi16() {
3966 let a = _mm256_set1_epi16(8);
3967 let b = _mm256_set1_epi16(2);
3968 let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b);
3969 assert_eq_m256i(r, _mm256_setzero_si256());
3970 let r = _mm256_maskz_shrdi_epi16::<1>(0b11111111_11111111, a, b);
3971 let e = _mm256_set1_epi16(1);
3972 assert_eq_m256i(r, e);
3973 }
3974
3975 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3976 unsafe fn test_mm_shrdi_epi16() {
3977 let a = _mm_set1_epi16(8);
3978 let b = _mm_set1_epi16(2);
3979 let r = _mm_shrdi_epi16::<1>(a, b);
3980 let e = _mm_set1_epi16(1);
3981 assert_eq_m128i(r, e);
3982 }
3983
3984 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3985 unsafe fn test_mm_mask_shrdi_epi16() {
3986 let a = _mm_set1_epi16(8);
3987 let b = _mm_set1_epi16(2);
3988 let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b);
3989 assert_eq_m128i(r, a);
3990 let r = _mm_mask_shrdi_epi16::<1>(a, 0b11111111, a, b);
3991 let e = _mm_set1_epi16(1);
3992 assert_eq_m128i(r, e);
3993 }
3994
3995 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3996 unsafe fn test_mm_maskz_shrdi_epi16() {
3997 let a = _mm_set1_epi16(8);
3998 let b = _mm_set1_epi16(2);
3999 let r = _mm_maskz_shrdi_epi16::<1>(0, a, b);
4000 assert_eq_m128i(r, _mm_setzero_si128());
4001 let r = _mm_maskz_shrdi_epi16::<1>(0b11111111, a, b);
4002 let e = _mm_set1_epi16(1);
4003 assert_eq_m128i(r, e);
4004 }
4005
4006 #[simd_test(enable = "avx512vbmi2")]
4007 unsafe fn test_mm512_mask_expandloadu_epi16() {
4008 let src = _mm512_set1_epi16(42);
4009 let a = &[
4010 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
4011 24, 25, 26, 27, 28, 29, 30, 31, 32,
4012 ];
4013 let p = a.as_ptr();
4014 let m = 0b11101000_11001010_11110000_00001111;
4015 let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p));
4016 let e = _mm512_set_epi16(
4017 16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
4018 42, 42, 42, 42, 42, 4, 3, 2, 1,
4019 );
4020 assert_eq_m512i(r, e);
4021 }
4022
4023 #[simd_test(enable = "avx512vbmi2")]
4024 unsafe fn test_mm512_maskz_expandloadu_epi16() {
4025 let a = &[
4026 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
4027 24, 25, 26, 27, 28, 29, 30, 31, 32,
4028 ];
4029 let p = a.as_ptr();
4030 let m = 0b11101000_11001010_11110000_00001111;
4031 let r = _mm512_maskz_expandloadu_epi16(m, black_box(p));
4032 let e = _mm512_set_epi16(
4033 16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
4034 0, 4, 3, 2, 1,
4035 );
4036 assert_eq_m512i(r, e);
4037 }
4038
4039 #[simd_test(enable = "avx512vbmi2,avx512vl")]
4040 unsafe fn test_mm256_mask_expandloadu_epi16() {
4041 let src = _mm256_set1_epi16(42);
4042 let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
4043 let p = a.as_ptr();
4044 let m = 0b11101000_11001010;
4045 let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p));
4046 let e = _mm256_set_epi16(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
4047 assert_eq_m256i(r, e);
4048 }
4049
4050 #[simd_test(enable = "avx512vbmi2,avx512vl")]
4051 unsafe fn test_mm256_maskz_expandloadu_epi16() {
4052 let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
4053 let p = a.as_ptr();
4054 let m = 0b11101000_11001010;
4055 let r = _mm256_maskz_expandloadu_epi16(m, black_box(p));
4056 let e = _mm256_set_epi16(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
4057 assert_eq_m256i(r, e);
4058 }
4059
4060 #[simd_test(enable = "avx512vbmi2,avx512vl")]
4061 unsafe fn test_mm_mask_expandloadu_epi16() {
4062 let src = _mm_set1_epi16(42);
4063 let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
4064 let p = a.as_ptr();
4065 let m = 0b11101000;
4066 let r = _mm_mask_expandloadu_epi16(src, m, black_box(p));
4067 let e = _mm_set_epi16(4, 3, 2, 42, 1, 42, 42, 42);
4068 assert_eq_m128i(r, e);
4069 }
4070
4071 #[simd_test(enable = "avx512vbmi2,avx512vl")]
4072 unsafe fn test_mm_maskz_expandloadu_epi16() {
4073 let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
4074 let p = a.as_ptr();
4075 let m = 0b11101000;
4076 let r = _mm_maskz_expandloadu_epi16(m, black_box(p));
4077 let e = _mm_set_epi16(4, 3, 2, 0, 1, 0, 0, 0);
4078 assert_eq_m128i(r, e);
4079 }
4080
4081 #[simd_test(enable = "avx512vbmi2")]
4082 unsafe fn test_mm512_mask_expandloadu_epi8() {
4083 let src = _mm512_set1_epi8(42);
4084 let a = &[
4085 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
4086 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
4087 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
4088 ];
4089 let p = a.as_ptr();
4090 let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
4091 let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p));
4092 let e = _mm512_set_epi8(
4093 32, 31, 30, 42, 29, 42, 42, 42, 28, 27, 42, 42, 26, 42, 25, 42, 24, 23, 22, 21, 42, 42,
4094 42, 42, 42, 42, 42, 42, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 42, 42, 42, 42,
4095 42, 42, 42, 42, 8, 42, 7, 42, 6, 42, 5, 42, 42, 4, 42, 3, 42, 2, 42, 1,
4096 );
4097 assert_eq_m512i(r, e);
4098 }
4099
4100 #[simd_test(enable = "avx512vbmi2")]
4101 unsafe fn test_mm512_maskz_expandloadu_epi8() {
4102 let a = &[
4103 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
4104 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
4105 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
4106 ];
4107 let p = a.as_ptr();
4108 let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
4109 let r = _mm512_maskz_expandloadu_epi8(m, black_box(p));
4110 let e = _mm512_set_epi8(
4111 32, 31, 30, 0, 29, 0, 0, 0, 28, 27, 0, 0, 26, 0, 25, 0, 24, 23, 22, 21, 0, 0, 0, 0, 0,
4112 0, 0, 0, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0,
4113 7, 0, 6, 0, 5, 0, 0, 4, 0, 3, 0, 2, 0, 1,
4114 );
4115 assert_eq_m512i(r, e);
4116 }
4117
4118 #[simd_test(enable = "avx512vbmi2,avx512vl")]
4119 unsafe fn test_mm256_mask_expandloadu_epi8() {
4120 let src = _mm256_set1_epi8(42);
4121 let a = &[
4122 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
4123 24, 25, 26, 27, 28, 29, 30, 31, 32,
4124 ];
4125 let p = a.as_ptr();
4126 let m = 0b11101000_11001010_11110000_00001111;
4127 let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p));
4128 let e = _mm256_set_epi8(
4129 16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
4130 42, 42, 42, 42, 42, 4, 3, 2, 1,
4131 );
4132 assert_eq_m256i(r, e);
4133 }
4134
4135 #[simd_test(enable = "avx512vbmi2,avx512vl")]
4136 unsafe fn test_mm256_maskz_expandloadu_epi8() {
4137 let a = &[
4138 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
4139 24, 25, 26, 27, 28, 29, 30, 31, 32,
4140 ];
4141 let p = a.as_ptr();
4142 let m = 0b11101000_11001010_11110000_00001111;
4143 let r = _mm256_maskz_expandloadu_epi8(m, black_box(p));
4144 let e = _mm256_set_epi8(
4145 16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
4146 0, 4, 3, 2, 1,
4147 );
4148 assert_eq_m256i(r, e);
4149 }
4150
4151 #[simd_test(enable = "avx512vbmi2,avx512vl")]
4152 unsafe fn test_mm_mask_expandloadu_epi8() {
4153 let src = _mm_set1_epi8(42);
4154 let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
4155 let p = a.as_ptr();
4156 let m = 0b11101000_11001010;
4157 let r = _mm_mask_expandloadu_epi8(src, m, black_box(p));
4158 let e = _mm_set_epi8(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
4159 assert_eq_m128i(r, e);
4160 }
4161
4162 #[simd_test(enable = "avx512vbmi2,avx512vl")]
4163 unsafe fn test_mm_maskz_expandloadu_epi8() {
4164 let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
4165 let p = a.as_ptr();
4166 let m = 0b11101000_11001010;
4167 let r = _mm_maskz_expandloadu_epi8(m, black_box(p));
4168 let e = _mm_set_epi8(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
4169 assert_eq_m128i(r, e);
4170 }
4171
4172 #[simd_test(enable = "avx512vbmi2")]
4173 unsafe fn test_mm512_mask_compressstoreu_epi16() {
4174 let a = _mm512_set_epi16(
4175 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
4176 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
4177 );
4178 let mut r = [0_i16; 32];
4179 _mm512_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a);
4180 assert_eq!(&r, &[0_i16; 32]);
4181 _mm512_mask_compressstoreu_epi16(
4182 r.as_mut_ptr() as *mut _,
4183 0b11110000_11001010_11111111_00000000,
4184 a,
4185 );
4186 assert_eq!(
4187 &r,
4188 &[
4189 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0,
4190 0, 0, 0, 0, 0, 0, 0, 0, 0
4191 ]
4192 );
4193 }
4194
4195 #[simd_test(enable = "avx512vbmi2,avx512vl")]
4196 unsafe fn test_mm256_mask_compressstoreu_epi16() {
4197 let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
4198 let mut r = [0_i16; 16];
4199 _mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a);
4200 assert_eq!(&r, &[0_i16; 16]);
4201 _mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0b11110000_11001010, a);
4202 assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
4203 }
4204
4205 #[simd_test(enable = "avx512vbmi2,avx512vl")]
4206 unsafe fn test_mm_mask_compressstoreu_epi16() {
4207 let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
4208 let mut r = [0_i16; 8];
4209 _mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a);
4210 assert_eq!(&r, &[0_i16; 8]);
4211 _mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0b11110000, a);
4212 assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]);
4213 }
4214
4215 #[simd_test(enable = "avx512vbmi2")]
4216 unsafe fn test_mm512_mask_compressstoreu_epi8() {
4217 let a = _mm512_set_epi8(
4218 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43,
4219 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21,
4220 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
4221 );
4222 let mut r = [0_i8; 64];
4223 _mm512_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a);
4224 assert_eq!(&r, &[0_i8; 64]);
4225 _mm512_mask_compressstoreu_epi8(
4226 r.as_mut_ptr() as *mut _,
4227 0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111,
4228 a,
4229 );
4230 assert_eq!(
4231 &r,
4232 &[
4233 1, 2, 3, 4, 13, 14, 15, 16, 17, 19, 21, 23, 26, 28, 30, 32, 41, 42, 43, 44, 45, 46,
4234 47, 48, 50, 52, 55, 56, 61, 62, 63, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4235 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4236 ]
4237 );
4238 }
4239
4240 #[simd_test(enable = "avx512vbmi2,avx512vl")]
4241 unsafe fn test_mm256_mask_compressstoreu_epi8() {
4242 let a = _mm256_set_epi8(
4243 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
4244 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
4245 );
4246 let mut r = [0_i8; 32];
4247 _mm256_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a);
4248 assert_eq!(&r, &[0_i8; 32]);
4249 _mm256_mask_compressstoreu_epi8(
4250 r.as_mut_ptr() as *mut _,
4251 0b11110000_11001010_11111111_00000000,
4252 a,
4253 );
4254 assert_eq!(
4255 &r,
4256 &[
4257 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0,
4258 0, 0, 0, 0, 0, 0, 0, 0, 0
4259 ]
4260 );
4261 }
4262
4263 #[simd_test(enable = "avx512vbmi2,avx512vl")]
4264 unsafe fn test_mm_mask_compressstoreu_epi8() {
4265 let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
4266 let mut r = [0_i8; 16];
4267 _mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a);
4268 assert_eq!(&r, &[0_i8; 16]);
4269 _mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0b11110000_11001010, a);
4270 assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
4271 }
4272}
4273