1//! Advanced Vector Extensions 2 (AVX)
2//!
3//! AVX2 expands most AVX commands to 256-bit wide vector registers and
4//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
5//!
6//! The references are:
7//!
8//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
9//! Instruction Set Reference, A-Z][intel64_ref].
10//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
11//! System Instructions][amd64_ref].
12//!
13//! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
14//! overview of the instructions available.
15//!
16//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
17//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
18//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
19//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
20
21use crate::core_arch::{simd::*, x86::*};
22use crate::intrinsics::simd::*;
23
24#[cfg(test)]
25use stdarch_test::assert_instr;
26
27/// Computes the absolute values of packed 32-bit integers in `a`.
28///
29/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi32)
30#[inline]
31#[target_feature(enable = "avx2")]
32#[cfg_attr(test, assert_instr(vpabsd))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34pub fn _mm256_abs_epi32(a: __m256i) -> __m256i {
35 unsafe {
36 let a: i32x8 = a.as_i32x8();
37 let r: i32x8 = simd_select::<m32x8, _>(mask:simd_lt(a, i32x8::ZERO), if_true:simd_neg(a), if_false:a);
38 transmute(src:r)
39 }
40}
41
42/// Computes the absolute values of packed 16-bit integers in `a`.
43///
44/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi16)
45#[inline]
46#[target_feature(enable = "avx2")]
47#[cfg_attr(test, assert_instr(vpabsw))]
48#[stable(feature = "simd_x86", since = "1.27.0")]
49pub fn _mm256_abs_epi16(a: __m256i) -> __m256i {
50 unsafe {
51 let a: i16x16 = a.as_i16x16();
52 let r: i16x16 = simd_select::<m16x16, _>(mask:simd_lt(a, i16x16::ZERO), if_true:simd_neg(a), if_false:a);
53 transmute(src:r)
54 }
55}
56
57/// Computes the absolute values of packed 8-bit integers in `a`.
58///
59/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi8)
60#[inline]
61#[target_feature(enable = "avx2")]
62#[cfg_attr(test, assert_instr(vpabsb))]
63#[stable(feature = "simd_x86", since = "1.27.0")]
64pub fn _mm256_abs_epi8(a: __m256i) -> __m256i {
65 unsafe {
66 let a: i8x32 = a.as_i8x32();
67 let r: i8x32 = simd_select::<m8x32, _>(mask:simd_lt(a, i8x32::ZERO), if_true:simd_neg(a), if_false:a);
68 transmute(src:r)
69 }
70}
71
72/// Adds packed 64-bit integers in `a` and `b`.
73///
74/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64)
75#[inline]
76#[target_feature(enable = "avx2")]
77#[cfg_attr(test, assert_instr(vpaddq))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
80 unsafe { transmute(src:simd_add(x:a.as_i64x4(), y:b.as_i64x4())) }
81}
82
83/// Adds packed 32-bit integers in `a` and `b`.
84///
85/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi32)
86#[inline]
87#[target_feature(enable = "avx2")]
88#[cfg_attr(test, assert_instr(vpaddd))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
91 unsafe { transmute(src:simd_add(x:a.as_i32x8(), y:b.as_i32x8())) }
92}
93
94/// Adds packed 16-bit integers in `a` and `b`.
95///
96/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi16)
97#[inline]
98#[target_feature(enable = "avx2")]
99#[cfg_attr(test, assert_instr(vpaddw))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
102 unsafe { transmute(src:simd_add(x:a.as_i16x16(), y:b.as_i16x16())) }
103}
104
105/// Adds packed 8-bit integers in `a` and `b`.
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi8)
108#[inline]
109#[target_feature(enable = "avx2")]
110#[cfg_attr(test, assert_instr(vpaddb))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
113 unsafe { transmute(src:simd_add(x:a.as_i8x32(), y:b.as_i8x32())) }
114}
115
116/// Adds packed 8-bit integers in `a` and `b` using saturation.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi8)
119#[inline]
120#[target_feature(enable = "avx2")]
121#[cfg_attr(test, assert_instr(vpaddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
124 unsafe { transmute(src:simd_saturating_add(x:a.as_i8x32(), y:b.as_i8x32())) }
125}
126
127/// Adds packed 16-bit integers in `a` and `b` using saturation.
128///
129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi16)
130#[inline]
131#[target_feature(enable = "avx2")]
132#[cfg_attr(test, assert_instr(vpaddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
135 unsafe { transmute(src:simd_saturating_add(x:a.as_i16x16(), y:b.as_i16x16())) }
136}
137
138/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
139///
140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu8)
141#[inline]
142#[target_feature(enable = "avx2")]
143#[cfg_attr(test, assert_instr(vpaddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
146 unsafe { transmute(src:simd_saturating_add(x:a.as_u8x32(), y:b.as_u8x32())) }
147}
148
149/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
150///
151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu16)
152#[inline]
153#[target_feature(enable = "avx2")]
154#[cfg_attr(test, assert_instr(vpaddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
157 unsafe { transmute(src:simd_saturating_add(x:a.as_u16x16(), y:b.as_u16x16())) }
158}
159
160/// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
161/// result, shifts the result right by `n` bytes, and returns the low 16 bytes.
162///
163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi8)
164#[inline]
165#[target_feature(enable = "avx2")]
166#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
167#[rustc_legacy_const_generics(2)]
168#[stable(feature = "simd_x86", since = "1.27.0")]
169pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
170 static_assert_uimm_bits!(IMM8, 8);
171
172 // If palignr is shifting the pair of vectors more than the size of two
173 // lanes, emit zero.
174 if IMM8 >= 32 {
175 return _mm256_setzero_si256();
176 }
177 // If palignr is shifting the pair of input vectors more than one lane,
178 // but less than two lanes, convert to shifting in zeroes.
179 let (a, b) = if IMM8 > 16 {
180 (_mm256_setzero_si256(), a)
181 } else {
182 (a, b)
183 };
184 unsafe {
185 if IMM8 == 16 {
186 return transmute(a);
187 }
188 }
189 const fn mask(shift: u32, i: u32) -> u32 {
190 let shift = shift % 16;
191 let mod_i = i % 16;
192 if mod_i < (16 - shift) {
193 i + shift
194 } else {
195 i + 16 + shift
196 }
197 }
198
199 unsafe {
200 let r: i8x32 = simd_shuffle!(
201 b.as_i8x32(),
202 a.as_i8x32(),
203 [
204 mask(IMM8 as u32, 0),
205 mask(IMM8 as u32, 1),
206 mask(IMM8 as u32, 2),
207 mask(IMM8 as u32, 3),
208 mask(IMM8 as u32, 4),
209 mask(IMM8 as u32, 5),
210 mask(IMM8 as u32, 6),
211 mask(IMM8 as u32, 7),
212 mask(IMM8 as u32, 8),
213 mask(IMM8 as u32, 9),
214 mask(IMM8 as u32, 10),
215 mask(IMM8 as u32, 11),
216 mask(IMM8 as u32, 12),
217 mask(IMM8 as u32, 13),
218 mask(IMM8 as u32, 14),
219 mask(IMM8 as u32, 15),
220 mask(IMM8 as u32, 16),
221 mask(IMM8 as u32, 17),
222 mask(IMM8 as u32, 18),
223 mask(IMM8 as u32, 19),
224 mask(IMM8 as u32, 20),
225 mask(IMM8 as u32, 21),
226 mask(IMM8 as u32, 22),
227 mask(IMM8 as u32, 23),
228 mask(IMM8 as u32, 24),
229 mask(IMM8 as u32, 25),
230 mask(IMM8 as u32, 26),
231 mask(IMM8 as u32, 27),
232 mask(IMM8 as u32, 28),
233 mask(IMM8 as u32, 29),
234 mask(IMM8 as u32, 30),
235 mask(IMM8 as u32, 31),
236 ],
237 );
238 transmute(r)
239 }
240}
241
242/// Computes the bitwise AND of 256 bits (representing integer data)
243/// in `a` and `b`.
244///
245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_si256)
246#[inline]
247#[target_feature(enable = "avx2")]
248#[cfg_attr(test, assert_instr(vandps))]
249#[stable(feature = "simd_x86", since = "1.27.0")]
250pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
251 unsafe { transmute(src:simd_and(x:a.as_i64x4(), y:b.as_i64x4())) }
252}
253
254/// Computes the bitwise NOT of 256 bits (representing integer data)
255/// in `a` and then AND with `b`.
256///
257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_si256)
258#[inline]
259#[target_feature(enable = "avx2")]
260#[cfg_attr(test, assert_instr(vandnps))]
261#[stable(feature = "simd_x86", since = "1.27.0")]
262pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
263 unsafe {
264 let all_ones: __m256i = _mm256_set1_epi8(-1);
265 transmute(src:simd_and(
266 x:simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
267 y:b.as_i64x4(),
268 ))
269 }
270}
271
272/// Averages packed unsigned 16-bit integers in `a` and `b`.
273///
274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu16)
275#[inline]
276#[target_feature(enable = "avx2")]
277#[cfg_attr(test, assert_instr(vpavgw))]
278#[stable(feature = "simd_x86", since = "1.27.0")]
279pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
280 unsafe {
281 let a: u32x16 = simd_cast::<_, u32x16>(a.as_u16x16());
282 let b: u32x16 = simd_cast::<_, u32x16>(b.as_u16x16());
283 let r: u32x16 = simd_shr(lhs:simd_add(simd_add(a, b), u32x16::splat(1)), rhs:u32x16::splat(1));
284 transmute(src:simd_cast::<_, u16x16>(r))
285 }
286}
287
288/// Averages packed unsigned 8-bit integers in `a` and `b`.
289///
290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu8)
291#[inline]
292#[target_feature(enable = "avx2")]
293#[cfg_attr(test, assert_instr(vpavgb))]
294#[stable(feature = "simd_x86", since = "1.27.0")]
295pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
296 unsafe {
297 let a: u16x32 = simd_cast::<_, u16x32>(a.as_u8x32());
298 let b: u16x32 = simd_cast::<_, u16x32>(b.as_u8x32());
299 let r: u16x32 = simd_shr(lhs:simd_add(simd_add(a, b), u16x32::splat(1)), rhs:u16x32::splat(1));
300 transmute(src:simd_cast::<_, u8x32>(r))
301 }
302}
303
304/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM4`.
305///
306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi32)
307#[inline]
308#[target_feature(enable = "avx2")]
309#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
310#[rustc_legacy_const_generics(2)]
311#[stable(feature = "simd_x86", since = "1.27.0")]
312pub fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
313 static_assert_uimm_bits!(IMM4, 4);
314 unsafe {
315 let a: i32x4 = a.as_i32x4();
316 let b: i32x4 = b.as_i32x4();
317 let r: i32x4 = simd_shuffle!(
318 a,
319 b,
320 [
321 [0, 4, 0, 4][IMM4 as usize & 0b11],
322 [1, 1, 5, 5][IMM4 as usize & 0b11],
323 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
324 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
325 ],
326 );
327 transmute(src:r)
328 }
329}
330
331/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM8`.
332///
333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi32)
334#[inline]
335#[target_feature(enable = "avx2")]
336#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
337#[rustc_legacy_const_generics(2)]
338#[stable(feature = "simd_x86", since = "1.27.0")]
339pub fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
340 static_assert_uimm_bits!(IMM8, 8);
341 unsafe {
342 let a: i32x8 = a.as_i32x8();
343 let b: i32x8 = b.as_i32x8();
344 let r: i32x8 = simd_shuffle!(
345 a,
346 b,
347 [
348 [0, 8, 0, 8][IMM8 as usize & 0b11],
349 [1, 1, 9, 9][IMM8 as usize & 0b11],
350 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
351 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
352 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
353 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
354 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
355 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
356 ],
357 );
358 transmute(src:r)
359 }
360}
361
362/// Blends packed 16-bit integers from `a` and `b` using control mask `IMM8`.
363///
364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi16)
365#[inline]
366#[target_feature(enable = "avx2")]
367#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
368#[rustc_legacy_const_generics(2)]
369#[stable(feature = "simd_x86", since = "1.27.0")]
370pub fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
371 static_assert_uimm_bits!(IMM8, 8);
372 unsafe {
373 let a = a.as_i16x16();
374 let b = b.as_i16x16();
375
376 let r: i16x16 = simd_shuffle!(
377 a,
378 b,
379 [
380 [0, 16, 0, 16][IMM8 as usize & 0b11],
381 [1, 1, 17, 17][IMM8 as usize & 0b11],
382 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
383 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
384 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
385 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
386 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
387 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
388 [8, 24, 8, 24][IMM8 as usize & 0b11],
389 [9, 9, 25, 25][IMM8 as usize & 0b11],
390 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
391 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
392 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
393 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
394 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
395 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
396 ],
397 );
398 transmute(r)
399 }
400}
401
402/// Blends packed 8-bit integers from `a` and `b` using `mask`.
403///
404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_epi8)
405#[inline]
406#[target_feature(enable = "avx2")]
407#[cfg_attr(test, assert_instr(vpblendvb))]
408#[stable(feature = "simd_x86", since = "1.27.0")]
409pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
410 unsafe {
411 let mask: i8x32 = simd_lt(x:mask.as_i8x32(), y:i8x32::ZERO);
412 transmute(src:simd_select(mask, if_true:b.as_i8x32(), if_false:a.as_i8x32()))
413 }
414}
415
416/// Broadcasts the low packed 8-bit integer from `a` to all elements of
417/// the 128-bit returned value.
418///
419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastb_epi8)
420#[inline]
421#[target_feature(enable = "avx2")]
422#[cfg_attr(test, assert_instr(vpbroadcastb))]
423#[stable(feature = "simd_x86", since = "1.27.0")]
424pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
425 unsafe {
426 let ret: i8x16 = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
427 transmute::<i8x16, _>(src:ret)
428 }
429}
430
431/// Broadcasts the low packed 8-bit integer from `a` to all elements of
432/// the 256-bit returned value.
433///
434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastb_epi8)
435#[inline]
436#[target_feature(enable = "avx2")]
437#[cfg_attr(test, assert_instr(vpbroadcastb))]
438#[stable(feature = "simd_x86", since = "1.27.0")]
439pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
440 unsafe {
441 let ret: i8x32 = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
442 transmute::<i8x32, _>(src:ret)
443 }
444}
445
446// N.B., `simd_shuffle4` with integer data types for `a` and `b` is
447// often compiled to `vbroadcastss`.
448/// Broadcasts the low packed 32-bit integer from `a` to all elements of
449/// the 128-bit returned value.
450///
451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastd_epi32)
452#[inline]
453#[target_feature(enable = "avx2")]
454#[cfg_attr(test, assert_instr(vbroadcastss))]
455#[stable(feature = "simd_x86", since = "1.27.0")]
456pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
457 unsafe {
458 let ret: i32x4 = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
459 transmute::<i32x4, _>(src:ret)
460 }
461}
462
463// N.B., `simd_shuffle4`` with integer data types for `a` and `b` is
464// often compiled to `vbroadcastss`.
465/// Broadcasts the low packed 32-bit integer from `a` to all elements of
466/// the 256-bit returned value.
467///
468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastd_epi32)
469#[inline]
470#[target_feature(enable = "avx2")]
471#[cfg_attr(test, assert_instr(vbroadcastss))]
472#[stable(feature = "simd_x86", since = "1.27.0")]
473pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
474 unsafe {
475 let ret: i32x8 = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
476 transmute::<i32x8, _>(src:ret)
477 }
478}
479
480/// Broadcasts the low packed 64-bit integer from `a` to all elements of
481/// the 128-bit returned value.
482///
483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastq_epi64)
484#[inline]
485#[target_feature(enable = "avx2")]
486// Emits `vmovddup` instead of `vpbroadcastq`
487// See https://github.com/rust-lang/stdarch/issues/791
488#[cfg_attr(test, assert_instr(vmovddup))]
489#[stable(feature = "simd_x86", since = "1.27.0")]
490pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
491 unsafe {
492 let ret: i64x2 = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
493 transmute::<i64x2, _>(src:ret)
494 }
495}
496
497/// Broadcasts the low packed 64-bit integer from `a` to all elements of
498/// the 256-bit returned value.
499///
500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastq_epi64)
501#[inline]
502#[target_feature(enable = "avx2")]
503#[cfg_attr(test, assert_instr(vbroadcastsd))]
504#[stable(feature = "simd_x86", since = "1.27.0")]
505pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
506 unsafe {
507 let ret: i64x4 = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
508 transmute::<i64x4, _>(src:ret)
509 }
510}
511
512/// Broadcasts the low double-precision (64-bit) floating-point element
513/// from `a` to all elements of the 128-bit returned value.
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastsd_pd)
516#[inline]
517#[target_feature(enable = "avx2")]
518#[cfg_attr(test, assert_instr(vmovddup))]
519#[stable(feature = "simd_x86", since = "1.27.0")]
520pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
521 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
522}
523
524/// Broadcasts the low double-precision (64-bit) floating-point element
525/// from `a` to all elements of the 256-bit returned value.
526///
527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsd_pd)
528#[inline]
529#[target_feature(enable = "avx2")]
530#[cfg_attr(test, assert_instr(vbroadcastsd))]
531#[stable(feature = "simd_x86", since = "1.27.0")]
532pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
533 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
534}
535
536/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
537/// the 256-bit returned value.
538///
539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastsi128_si256)
540#[inline]
541#[target_feature(enable = "avx2")]
542#[stable(feature = "simd_x86_updates", since = "1.82.0")]
543pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
544 unsafe {
545 let ret: i64x4 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
546 transmute::<i64x4, _>(src:ret)
547 }
548}
549
550// N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or
551// `vbroadcastf128`.
552/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
553/// the 256-bit returned value.
554///
555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsi128_si256)
556#[inline]
557#[target_feature(enable = "avx2")]
558#[stable(feature = "simd_x86", since = "1.27.0")]
559pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
560 unsafe {
561 let ret: i64x4 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
562 transmute::<i64x4, _>(src:ret)
563 }
564}
565
566/// Broadcasts the low single-precision (32-bit) floating-point element
567/// from `a` to all elements of the 128-bit returned value.
568///
569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastss_ps)
570#[inline]
571#[target_feature(enable = "avx2")]
572#[cfg_attr(test, assert_instr(vbroadcastss))]
573#[stable(feature = "simd_x86", since = "1.27.0")]
574pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
575 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
576}
577
578/// Broadcasts the low single-precision (32-bit) floating-point element
579/// from `a` to all elements of the 256-bit returned value.
580///
581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastss_ps)
582#[inline]
583#[target_feature(enable = "avx2")]
584#[cfg_attr(test, assert_instr(vbroadcastss))]
585#[stable(feature = "simd_x86", since = "1.27.0")]
586pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
587 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
588}
589
590/// Broadcasts the low packed 16-bit integer from a to all elements of
591/// the 128-bit returned value
592///
593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastw_epi16)
594#[inline]
595#[target_feature(enable = "avx2")]
596#[cfg_attr(test, assert_instr(vpbroadcastw))]
597#[stable(feature = "simd_x86", since = "1.27.0")]
598pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
599 unsafe {
600 let ret: i16x8 = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
601 transmute::<i16x8, _>(src:ret)
602 }
603}
604
605/// Broadcasts the low packed 16-bit integer from a to all elements of
606/// the 256-bit returned value
607///
608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastw_epi16)
609#[inline]
610#[target_feature(enable = "avx2")]
611#[cfg_attr(test, assert_instr(vpbroadcastw))]
612#[stable(feature = "simd_x86", since = "1.27.0")]
613pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
614 unsafe {
615 let ret: i16x16 = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
616 transmute::<i16x16, _>(src:ret)
617 }
618}
619
620/// Compares packed 64-bit integers in `a` and `b` for equality.
621///
622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64)
623#[inline]
624#[target_feature(enable = "avx2")]
625#[cfg_attr(test, assert_instr(vpcmpeqq))]
626#[stable(feature = "simd_x86", since = "1.27.0")]
627pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
628 unsafe { transmute::<i64x4, _>(src:simd_eq(x:a.as_i64x4(), y:b.as_i64x4())) }
629}
630
631/// Compares packed 32-bit integers in `a` and `b` for equality.
632///
633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32)
634#[inline]
635#[target_feature(enable = "avx2")]
636#[cfg_attr(test, assert_instr(vpcmpeqd))]
637#[stable(feature = "simd_x86", since = "1.27.0")]
638pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
639 unsafe { transmute::<i32x8, _>(src:simd_eq(x:a.as_i32x8(), y:b.as_i32x8())) }
640}
641
642/// Compares packed 16-bit integers in `a` and `b` for equality.
643///
644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16)
645#[inline]
646#[target_feature(enable = "avx2")]
647#[cfg_attr(test, assert_instr(vpcmpeqw))]
648#[stable(feature = "simd_x86", since = "1.27.0")]
649pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
650 unsafe { transmute::<i16x16, _>(src:simd_eq(x:a.as_i16x16(), y:b.as_i16x16())) }
651}
652
653/// Compares packed 8-bit integers in `a` and `b` for equality.
654///
655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8)
656#[inline]
657#[target_feature(enable = "avx2")]
658#[cfg_attr(test, assert_instr(vpcmpeqb))]
659#[stable(feature = "simd_x86", since = "1.27.0")]
660pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
661 unsafe { transmute::<i8x32, _>(src:simd_eq(x:a.as_i8x32(), y:b.as_i8x32())) }
662}
663
664/// Compares packed 64-bit integers in `a` and `b` for greater-than.
665///
666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64)
667#[inline]
668#[target_feature(enable = "avx2")]
669#[cfg_attr(test, assert_instr(vpcmpgtq))]
670#[stable(feature = "simd_x86", since = "1.27.0")]
671pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
672 unsafe { transmute::<i64x4, _>(src:simd_gt(x:a.as_i64x4(), y:b.as_i64x4())) }
673}
674
675/// Compares packed 32-bit integers in `a` and `b` for greater-than.
676///
677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32)
678#[inline]
679#[target_feature(enable = "avx2")]
680#[cfg_attr(test, assert_instr(vpcmpgtd))]
681#[stable(feature = "simd_x86", since = "1.27.0")]
682pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
683 unsafe { transmute::<i32x8, _>(src:simd_gt(x:a.as_i32x8(), y:b.as_i32x8())) }
684}
685
686/// Compares packed 16-bit integers in `a` and `b` for greater-than.
687///
688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16)
689#[inline]
690#[target_feature(enable = "avx2")]
691#[cfg_attr(test, assert_instr(vpcmpgtw))]
692#[stable(feature = "simd_x86", since = "1.27.0")]
693pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
694 unsafe { transmute::<i16x16, _>(src:simd_gt(x:a.as_i16x16(), y:b.as_i16x16())) }
695}
696
697/// Compares packed 8-bit integers in `a` and `b` for greater-than.
698///
699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8)
700#[inline]
701#[target_feature(enable = "avx2")]
702#[cfg_attr(test, assert_instr(vpcmpgtb))]
703#[stable(feature = "simd_x86", since = "1.27.0")]
704pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
705 unsafe { transmute::<i8x32, _>(src:simd_gt(x:a.as_i8x32(), y:b.as_i8x32())) }
706}
707
708/// Sign-extend 16-bit integers to 32-bit integers.
709///
710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi32)
711#[inline]
712#[target_feature(enable = "avx2")]
713#[cfg_attr(test, assert_instr(vpmovsxwd))]
714#[stable(feature = "simd_x86", since = "1.27.0")]
715pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
716 unsafe { transmute::<i32x8, _>(src:simd_cast(a.as_i16x8())) }
717}
718
719/// Sign-extend 16-bit integers to 64-bit integers.
720///
721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi64)
722#[inline]
723#[target_feature(enable = "avx2")]
724#[cfg_attr(test, assert_instr(vpmovsxwq))]
725#[stable(feature = "simd_x86", since = "1.27.0")]
726pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
727 unsafe {
728 let a: i16x8 = a.as_i16x8();
729 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
730 transmute::<i64x4, _>(src:simd_cast(v64))
731 }
732}
733
734/// Sign-extend 32-bit integers to 64-bit integers.
735///
736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi64)
737#[inline]
738#[target_feature(enable = "avx2")]
739#[cfg_attr(test, assert_instr(vpmovsxdq))]
740#[stable(feature = "simd_x86", since = "1.27.0")]
741pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
742 unsafe { transmute::<i64x4, _>(src:simd_cast(a.as_i32x4())) }
743}
744
745/// Sign-extend 8-bit integers to 16-bit integers.
746///
747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi16)
748#[inline]
749#[target_feature(enable = "avx2")]
750#[cfg_attr(test, assert_instr(vpmovsxbw))]
751#[stable(feature = "simd_x86", since = "1.27.0")]
752pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
753 unsafe { transmute::<i16x16, _>(src:simd_cast(a.as_i8x16())) }
754}
755
756/// Sign-extend 8-bit integers to 32-bit integers.
757///
758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi32)
759#[inline]
760#[target_feature(enable = "avx2")]
761#[cfg_attr(test, assert_instr(vpmovsxbd))]
762#[stable(feature = "simd_x86", since = "1.27.0")]
763pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
764 unsafe {
765 let a: i8x16 = a.as_i8x16();
766 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
767 transmute::<i32x8, _>(src:simd_cast(v64))
768 }
769}
770
771/// Sign-extend 8-bit integers to 64-bit integers.
772///
773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi64)
774#[inline]
775#[target_feature(enable = "avx2")]
776#[cfg_attr(test, assert_instr(vpmovsxbq))]
777#[stable(feature = "simd_x86", since = "1.27.0")]
778pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
779 unsafe {
780 let a: i8x16 = a.as_i8x16();
781 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
782 transmute::<i64x4, _>(src:simd_cast(v32))
783 }
784}
785
786/// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit
787/// integers, and stores the results in `dst`.
788///
789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi32)
790#[inline]
791#[target_feature(enable = "avx2")]
792#[cfg_attr(test, assert_instr(vpmovzxwd))]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
795 unsafe { transmute::<i32x8, _>(src:simd_cast(a.as_u16x8())) }
796}
797
798/// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit
799/// integers. The upper four elements of `a` are unused.
800///
801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi64)
802#[inline]
803#[target_feature(enable = "avx2")]
804#[cfg_attr(test, assert_instr(vpmovzxwq))]
805#[stable(feature = "simd_x86", since = "1.27.0")]
806pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
807 unsafe {
808 let a: u16x8 = a.as_u16x8();
809 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
810 transmute::<i64x4, _>(src:simd_cast(v64))
811 }
812}
813
814/// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers.
815///
816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_epi64)
817#[inline]
818#[target_feature(enable = "avx2")]
819#[cfg_attr(test, assert_instr(vpmovzxdq))]
820#[stable(feature = "simd_x86", since = "1.27.0")]
821pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
822 unsafe { transmute::<i64x4, _>(src:simd_cast(a.as_u32x4())) }
823}
824
825/// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers.
826///
827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi16)
828#[inline]
829#[target_feature(enable = "avx2")]
830#[cfg_attr(test, assert_instr(vpmovzxbw))]
831#[stable(feature = "simd_x86", since = "1.27.0")]
832pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
833 unsafe { transmute::<i16x16, _>(src:simd_cast(a.as_u8x16())) }
834}
835
836/// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit
837/// integers. The upper eight elements of `a` are unused.
838///
839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi32)
840#[inline]
841#[target_feature(enable = "avx2")]
842#[cfg_attr(test, assert_instr(vpmovzxbd))]
843#[stable(feature = "simd_x86", since = "1.27.0")]
844pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
845 unsafe {
846 let a: u8x16 = a.as_u8x16();
847 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
848 transmute::<i32x8, _>(src:simd_cast(v64))
849 }
850}
851
852/// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit
853/// integers. The upper twelve elements of `a` are unused.
854///
855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi64)
856#[inline]
857#[target_feature(enable = "avx2")]
858#[cfg_attr(test, assert_instr(vpmovzxbq))]
859#[stable(feature = "simd_x86", since = "1.27.0")]
860pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
861 unsafe {
862 let a: u8x16 = a.as_u8x16();
863 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
864 transmute::<i64x4, _>(src:simd_cast(v32))
865 }
866}
867
868/// Extracts 128 bits (of integer data) from `a` selected with `IMM1`.
869///
870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti128_si256)
871#[inline]
872#[target_feature(enable = "avx2")]
873#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
874#[rustc_legacy_const_generics(1)]
875#[stable(feature = "simd_x86", since = "1.27.0")]
876pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
877 static_assert_uimm_bits!(IMM1, 1);
878 unsafe {
879 let a: i64x4 = a.as_i64x4();
880 let b: i64x4 = i64x4::ZERO;
881 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
882 transmute(src:dst)
883 }
884}
885
886/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`.
887///
888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi16)
889#[inline]
890#[target_feature(enable = "avx2")]
891#[cfg_attr(test, assert_instr(vphaddw))]
892#[stable(feature = "simd_x86", since = "1.27.0")]
893pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
894 let a: i16x16 = a.as_i16x16();
895 let b: i16x16 = b.as_i16x16();
896 unsafe {
897 let even: i16x16 = simd_shuffle!(
898 a,
899 b,
900 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
901 );
902 let odd: i16x16 = simd_shuffle!(
903 a,
904 b,
905 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
906 );
907 simd_add(x:even, y:odd).as_m256i()
908 }
909}
910
911/// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`.
912///
913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi32)
914#[inline]
915#[target_feature(enable = "avx2")]
916#[cfg_attr(test, assert_instr(vphaddd))]
917#[stable(feature = "simd_x86", since = "1.27.0")]
918pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
919 let a: i32x8 = a.as_i32x8();
920 let b: i32x8 = b.as_i32x8();
921 unsafe {
922 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
923 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
924 simd_add(x:even, y:odd).as_m256i()
925 }
926}
927
928/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`
929/// using saturation.
930///
931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadds_epi16)
932#[inline]
933#[target_feature(enable = "avx2")]
934#[cfg_attr(test, assert_instr(vphaddsw))]
935#[stable(feature = "simd_x86", since = "1.27.0")]
936pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
937 unsafe { transmute(src:phaddsw(a.as_i16x16(), b.as_i16x16())) }
938}
939
940/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`.
941///
942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi16)
943#[inline]
944#[target_feature(enable = "avx2")]
945#[cfg_attr(test, assert_instr(vphsubw))]
946#[stable(feature = "simd_x86", since = "1.27.0")]
947pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
948 let a: i16x16 = a.as_i16x16();
949 let b: i16x16 = b.as_i16x16();
950 unsafe {
951 let even: i16x16 = simd_shuffle!(
952 a,
953 b,
954 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
955 );
956 let odd: i16x16 = simd_shuffle!(
957 a,
958 b,
959 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
960 );
961 simd_sub(lhs:even, rhs:odd).as_m256i()
962 }
963}
964
965/// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`.
966///
967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi32)
968#[inline]
969#[target_feature(enable = "avx2")]
970#[cfg_attr(test, assert_instr(vphsubd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
973 let a: i32x8 = a.as_i32x8();
974 let b: i32x8 = b.as_i32x8();
975 unsafe {
976 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
977 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
978 simd_sub(lhs:even, rhs:odd).as_m256i()
979 }
980}
981
982/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
983/// using saturation.
984///
985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsubs_epi16)
986#[inline]
987#[target_feature(enable = "avx2")]
988#[cfg_attr(test, assert_instr(vphsubsw))]
989#[stable(feature = "simd_x86", since = "1.27.0")]
990pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
991 unsafe { transmute(src:phsubsw(a.as_i16x16(), b.as_i16x16())) }
992}
993
994/// Returns values from `slice` at offsets determined by `offsets * scale`,
995/// where
996/// `scale` should be 1, 2, 4 or 8.
997///
998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi32)
999#[inline]
1000#[target_feature(enable = "avx2")]
1001#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1002#[rustc_legacy_const_generics(2)]
1003#[stable(feature = "simd_x86", since = "1.27.0")]
1004pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
1005 slice: *const i32,
1006 offsets: __m128i,
1007) -> __m128i {
1008 static_assert_imm8_scale!(SCALE);
1009 let zero: i32x4 = i32x4::ZERO;
1010 let neg_one: i32x4 = _mm_set1_epi32(-1).as_i32x4();
1011 let offsets: i32x4 = offsets.as_i32x4();
1012 let slice: *const i8 = slice as *const i8;
1013 let r: i32x4 = pgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1014 transmute(src:r)
1015}
1016
1017/// Returns values from `slice` at offsets determined by `offsets * scale`,
1018/// where
1019/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1020/// that position instead.
1021///
1022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi32)
1023#[inline]
1024#[target_feature(enable = "avx2")]
1025#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1026#[rustc_legacy_const_generics(4)]
1027#[stable(feature = "simd_x86", since = "1.27.0")]
1028pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1029 src: __m128i,
1030 slice: *const i32,
1031 offsets: __m128i,
1032 mask: __m128i,
1033) -> __m128i {
1034 static_assert_imm8_scale!(SCALE);
1035 let src: i32x4 = src.as_i32x4();
1036 let mask: i32x4 = mask.as_i32x4();
1037 let offsets: i32x4 = offsets.as_i32x4();
1038 let slice: *const i8 = slice as *const i8;
1039 let r: i32x4 = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1040 transmute(src:r)
1041}
1042
1043/// Returns values from `slice` at offsets determined by `offsets * scale`,
1044/// where
1045/// `scale` should be 1, 2, 4 or 8.
1046///
1047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi32)
1048#[inline]
1049#[target_feature(enable = "avx2")]
1050#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1051#[rustc_legacy_const_generics(2)]
1052#[stable(feature = "simd_x86", since = "1.27.0")]
1053pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1054 slice: *const i32,
1055 offsets: __m256i,
1056) -> __m256i {
1057 static_assert_imm8_scale!(SCALE);
1058 let zero: i32x8 = i32x8::ZERO;
1059 let neg_one: i32x8 = _mm256_set1_epi32(-1).as_i32x8();
1060 let offsets: i32x8 = offsets.as_i32x8();
1061 let slice: *const i8 = slice as *const i8;
1062 let r: i32x8 = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1063 transmute(src:r)
1064}
1065
1066/// Returns values from `slice` at offsets determined by `offsets * scale`,
1067/// where
1068/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1069/// that position instead.
1070///
1071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi32)
1072#[inline]
1073#[target_feature(enable = "avx2")]
1074#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1075#[rustc_legacy_const_generics(4)]
1076#[stable(feature = "simd_x86", since = "1.27.0")]
1077pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1078 src: __m256i,
1079 slice: *const i32,
1080 offsets: __m256i,
1081 mask: __m256i,
1082) -> __m256i {
1083 static_assert_imm8_scale!(SCALE);
1084 let src: i32x8 = src.as_i32x8();
1085 let mask: i32x8 = mask.as_i32x8();
1086 let offsets: i32x8 = offsets.as_i32x8();
1087 let slice: *const i8 = slice as *const i8;
1088 let r: i32x8 = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1089 transmute(src:r)
1090}
1091
1092/// Returns values from `slice` at offsets determined by `offsets * scale`,
1093/// where
1094/// `scale` should be 1, 2, 4 or 8.
1095///
1096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_ps)
1097#[inline]
1098#[target_feature(enable = "avx2")]
1099#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1100#[rustc_legacy_const_generics(2)]
1101#[stable(feature = "simd_x86", since = "1.27.0")]
1102pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1103 static_assert_imm8_scale!(SCALE);
1104 let zero: __m128 = _mm_setzero_ps();
1105 let neg_one: __m128 = _mm_set1_ps(-1.0);
1106 let offsets: i32x4 = offsets.as_i32x4();
1107 let slice: *const i8 = slice as *const i8;
1108 pgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1109}
1110
1111/// Returns values from `slice` at offsets determined by `offsets * scale`,
1112/// where
1113/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1114/// that position instead.
1115///
1116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_ps)
1117#[inline]
1118#[target_feature(enable = "avx2")]
1119#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1120#[rustc_legacy_const_generics(4)]
1121#[stable(feature = "simd_x86", since = "1.27.0")]
1122pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1123 src: __m128,
1124 slice: *const f32,
1125 offsets: __m128i,
1126 mask: __m128,
1127) -> __m128 {
1128 static_assert_imm8_scale!(SCALE);
1129 let offsets: i32x4 = offsets.as_i32x4();
1130 let slice: *const i8 = slice as *const i8;
1131 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1132}
1133
1134/// Returns values from `slice` at offsets determined by `offsets * scale`,
1135/// where
1136/// `scale` should be 1, 2, 4 or 8.
1137///
1138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_ps)
1139#[inline]
1140#[target_feature(enable = "avx2")]
1141#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1142#[rustc_legacy_const_generics(2)]
1143#[stable(feature = "simd_x86", since = "1.27.0")]
1144pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1145 static_assert_imm8_scale!(SCALE);
1146 let zero: __m256 = _mm256_setzero_ps();
1147 let neg_one: __m256 = _mm256_set1_ps(-1.0);
1148 let offsets: i32x8 = offsets.as_i32x8();
1149 let slice: *const i8 = slice as *const i8;
1150 vpgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1151}
1152
1153/// Returns values from `slice` at offsets determined by `offsets * scale`,
1154/// where
1155/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1156/// that position instead.
1157///
1158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_ps)
1159#[inline]
1160#[target_feature(enable = "avx2")]
1161#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1162#[rustc_legacy_const_generics(4)]
1163#[stable(feature = "simd_x86", since = "1.27.0")]
1164pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1165 src: __m256,
1166 slice: *const f32,
1167 offsets: __m256i,
1168 mask: __m256,
1169) -> __m256 {
1170 static_assert_imm8_scale!(SCALE);
1171 let offsets: i32x8 = offsets.as_i32x8();
1172 let slice: *const i8 = slice as *const i8;
1173 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1174}
1175
1176/// Returns values from `slice` at offsets determined by `offsets * scale`,
1177/// where
1178/// `scale` should be 1, 2, 4 or 8.
1179///
1180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi64)
1181#[inline]
1182#[target_feature(enable = "avx2")]
1183#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1184#[rustc_legacy_const_generics(2)]
1185#[stable(feature = "simd_x86", since = "1.27.0")]
1186pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1187 slice: *const i64,
1188 offsets: __m128i,
1189) -> __m128i {
1190 static_assert_imm8_scale!(SCALE);
1191 let zero: i64x2 = i64x2::ZERO;
1192 let neg_one: i64x2 = _mm_set1_epi64x(-1).as_i64x2();
1193 let offsets: i32x4 = offsets.as_i32x4();
1194 let slice: *const i8 = slice as *const i8;
1195 let r: i64x2 = pgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1196 transmute(src:r)
1197}
1198
1199/// Returns values from `slice` at offsets determined by `offsets * scale`,
1200/// where
1201/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1202/// that position instead.
1203///
1204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi64)
1205#[inline]
1206#[target_feature(enable = "avx2")]
1207#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1208#[rustc_legacy_const_generics(4)]
1209#[stable(feature = "simd_x86", since = "1.27.0")]
1210pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1211 src: __m128i,
1212 slice: *const i64,
1213 offsets: __m128i,
1214 mask: __m128i,
1215) -> __m128i {
1216 static_assert_imm8_scale!(SCALE);
1217 let src: i64x2 = src.as_i64x2();
1218 let mask: i64x2 = mask.as_i64x2();
1219 let offsets: i32x4 = offsets.as_i32x4();
1220 let slice: *const i8 = slice as *const i8;
1221 let r: i64x2 = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1222 transmute(src:r)
1223}
1224
1225/// Returns values from `slice` at offsets determined by `offsets * scale`,
1226/// where
1227/// `scale` should be 1, 2, 4 or 8.
1228///
1229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi64)
1230#[inline]
1231#[target_feature(enable = "avx2")]
1232#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1233#[rustc_legacy_const_generics(2)]
1234#[stable(feature = "simd_x86", since = "1.27.0")]
1235pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1236 slice: *const i64,
1237 offsets: __m128i,
1238) -> __m256i {
1239 static_assert_imm8_scale!(SCALE);
1240 let zero: i64x4 = i64x4::ZERO;
1241 let neg_one: i64x4 = _mm256_set1_epi64x(-1).as_i64x4();
1242 let offsets: i32x4 = offsets.as_i32x4();
1243 let slice: *const i8 = slice as *const i8;
1244 let r: i64x4 = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1245 transmute(src:r)
1246}
1247
1248/// Returns values from `slice` at offsets determined by `offsets * scale`,
1249/// where
1250/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1251/// that position instead.
1252///
1253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi64)
1254#[inline]
1255#[target_feature(enable = "avx2")]
1256#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1257#[rustc_legacy_const_generics(4)]
1258#[stable(feature = "simd_x86", since = "1.27.0")]
1259pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1260 src: __m256i,
1261 slice: *const i64,
1262 offsets: __m128i,
1263 mask: __m256i,
1264) -> __m256i {
1265 static_assert_imm8_scale!(SCALE);
1266 let src: i64x4 = src.as_i64x4();
1267 let mask: i64x4 = mask.as_i64x4();
1268 let offsets: i32x4 = offsets.as_i32x4();
1269 let slice: *const i8 = slice as *const i8;
1270 let r: i64x4 = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1271 transmute(src:r)
1272}
1273
1274/// Returns values from `slice` at offsets determined by `offsets * scale`,
1275/// where
1276/// `scale` should be 1, 2, 4 or 8.
1277///
1278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_pd)
1279#[inline]
1280#[target_feature(enable = "avx2")]
1281#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1282#[rustc_legacy_const_generics(2)]
1283#[stable(feature = "simd_x86", since = "1.27.0")]
1284pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1285 static_assert_imm8_scale!(SCALE);
1286 let zero: __m128d = _mm_setzero_pd();
1287 let neg_one: __m128d = _mm_set1_pd(-1.0);
1288 let offsets: i32x4 = offsets.as_i32x4();
1289 let slice: *const i8 = slice as *const i8;
1290 pgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1291}
1292
1293/// Returns values from `slice` at offsets determined by `offsets * scale`,
1294/// where
1295/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1296/// that position instead.
1297///
1298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_pd)
1299#[inline]
1300#[target_feature(enable = "avx2")]
1301#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1302#[rustc_legacy_const_generics(4)]
1303#[stable(feature = "simd_x86", since = "1.27.0")]
1304pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1305 src: __m128d,
1306 slice: *const f64,
1307 offsets: __m128i,
1308 mask: __m128d,
1309) -> __m128d {
1310 static_assert_imm8_scale!(SCALE);
1311 let offsets: i32x4 = offsets.as_i32x4();
1312 let slice: *const i8 = slice as *const i8;
1313 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1314}
1315
1316/// Returns values from `slice` at offsets determined by `offsets * scale`,
1317/// where
1318/// `scale` should be 1, 2, 4 or 8.
1319///
1320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_pd)
1321#[inline]
1322#[target_feature(enable = "avx2")]
1323#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1324#[rustc_legacy_const_generics(2)]
1325#[stable(feature = "simd_x86", since = "1.27.0")]
1326pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1327 slice: *const f64,
1328 offsets: __m128i,
1329) -> __m256d {
1330 static_assert_imm8_scale!(SCALE);
1331 let zero: __m256d = _mm256_setzero_pd();
1332 let neg_one: __m256d = _mm256_set1_pd(-1.0);
1333 let offsets: i32x4 = offsets.as_i32x4();
1334 let slice: *const i8 = slice as *const i8;
1335 vpgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1336}
1337
1338/// Returns values from `slice` at offsets determined by `offsets * scale`,
1339/// where
1340/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1341/// that position instead.
1342///
1343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_pd)
1344#[inline]
1345#[target_feature(enable = "avx2")]
1346#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1347#[rustc_legacy_const_generics(4)]
1348#[stable(feature = "simd_x86", since = "1.27.0")]
1349pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1350 src: __m256d,
1351 slice: *const f64,
1352 offsets: __m128i,
1353 mask: __m256d,
1354) -> __m256d {
1355 static_assert_imm8_scale!(SCALE);
1356 let offsets: i32x4 = offsets.as_i32x4();
1357 let slice: *const i8 = slice as *const i8;
1358 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1359}
1360
1361/// Returns values from `slice` at offsets determined by `offsets * scale`,
1362/// where
1363/// `scale` should be 1, 2, 4 or 8.
1364///
1365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi32)
1366#[inline]
1367#[target_feature(enable = "avx2")]
1368#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1369#[rustc_legacy_const_generics(2)]
1370#[stable(feature = "simd_x86", since = "1.27.0")]
1371pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1372 slice: *const i32,
1373 offsets: __m128i,
1374) -> __m128i {
1375 static_assert_imm8_scale!(SCALE);
1376 let zero: i32x4 = i32x4::ZERO;
1377 let neg_one: i32x4 = _mm_set1_epi64x(-1).as_i32x4();
1378 let offsets: i64x2 = offsets.as_i64x2();
1379 let slice: *const i8 = slice as *const i8;
1380 let r: i32x4 = pgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1381 transmute(src:r)
1382}
1383
1384/// Returns values from `slice` at offsets determined by `offsets * scale`,
1385/// where
1386/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1387/// that position instead.
1388///
1389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi32)
1390#[inline]
1391#[target_feature(enable = "avx2")]
1392#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1393#[rustc_legacy_const_generics(4)]
1394#[stable(feature = "simd_x86", since = "1.27.0")]
1395pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1396 src: __m128i,
1397 slice: *const i32,
1398 offsets: __m128i,
1399 mask: __m128i,
1400) -> __m128i {
1401 static_assert_imm8_scale!(SCALE);
1402 let src: i32x4 = src.as_i32x4();
1403 let mask: i32x4 = mask.as_i32x4();
1404 let offsets: i64x2 = offsets.as_i64x2();
1405 let slice: *const i8 = slice as *const i8;
1406 let r: i32x4 = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1407 transmute(src:r)
1408}
1409
1410/// Returns values from `slice` at offsets determined by `offsets * scale`,
1411/// where
1412/// `scale` should be 1, 2, 4 or 8.
1413///
1414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi32)
1415#[inline]
1416#[target_feature(enable = "avx2")]
1417#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1418#[rustc_legacy_const_generics(2)]
1419#[stable(feature = "simd_x86", since = "1.27.0")]
1420pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1421 slice: *const i32,
1422 offsets: __m256i,
1423) -> __m128i {
1424 static_assert_imm8_scale!(SCALE);
1425 let zero: i32x4 = i32x4::ZERO;
1426 let neg_one: i32x4 = _mm_set1_epi64x(-1).as_i32x4();
1427 let offsets: i64x4 = offsets.as_i64x4();
1428 let slice: *const i8 = slice as *const i8;
1429 let r: i32x4 = vpgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1430 transmute(src:r)
1431}
1432
1433/// Returns values from `slice` at offsets determined by `offsets * scale`,
1434/// where
1435/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1436/// that position instead.
1437///
1438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi32)
1439#[inline]
1440#[target_feature(enable = "avx2")]
1441#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1442#[rustc_legacy_const_generics(4)]
1443#[stable(feature = "simd_x86", since = "1.27.0")]
1444pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1445 src: __m128i,
1446 slice: *const i32,
1447 offsets: __m256i,
1448 mask: __m128i,
1449) -> __m128i {
1450 static_assert_imm8_scale!(SCALE);
1451 let src: i32x4 = src.as_i32x4();
1452 let mask: i32x4 = mask.as_i32x4();
1453 let offsets: i64x4 = offsets.as_i64x4();
1454 let slice: *const i8 = slice as *const i8;
1455 let r: i32x4 = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1456 transmute(src:r)
1457}
1458
1459/// Returns values from `slice` at offsets determined by `offsets * scale`,
1460/// where
1461/// `scale` should be 1, 2, 4 or 8.
1462///
1463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_ps)
1464#[inline]
1465#[target_feature(enable = "avx2")]
1466#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1467#[rustc_legacy_const_generics(2)]
1468#[stable(feature = "simd_x86", since = "1.27.0")]
1469pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1470 static_assert_imm8_scale!(SCALE);
1471 let zero: __m128 = _mm_setzero_ps();
1472 let neg_one: __m128 = _mm_set1_ps(-1.0);
1473 let offsets: i64x2 = offsets.as_i64x2();
1474 let slice: *const i8 = slice as *const i8;
1475 pgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1476}
1477
1478/// Returns values from `slice` at offsets determined by `offsets * scale`,
1479/// where
1480/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1481/// that position instead.
1482///
1483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_ps)
1484#[inline]
1485#[target_feature(enable = "avx2")]
1486#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1487#[rustc_legacy_const_generics(4)]
1488#[stable(feature = "simd_x86", since = "1.27.0")]
1489pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1490 src: __m128,
1491 slice: *const f32,
1492 offsets: __m128i,
1493 mask: __m128,
1494) -> __m128 {
1495 static_assert_imm8_scale!(SCALE);
1496 let offsets: i64x2 = offsets.as_i64x2();
1497 let slice: *const i8 = slice as *const i8;
1498 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1499}
1500
1501/// Returns values from `slice` at offsets determined by `offsets * scale`,
1502/// where
1503/// `scale` should be 1, 2, 4 or 8.
1504///
1505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_ps)
1506#[inline]
1507#[target_feature(enable = "avx2")]
1508#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1509#[rustc_legacy_const_generics(2)]
1510#[stable(feature = "simd_x86", since = "1.27.0")]
1511pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1512 static_assert_imm8_scale!(SCALE);
1513 let zero: __m128 = _mm_setzero_ps();
1514 let neg_one: __m128 = _mm_set1_ps(-1.0);
1515 let offsets: i64x4 = offsets.as_i64x4();
1516 let slice: *const i8 = slice as *const i8;
1517 vpgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1518}
1519
1520/// Returns values from `slice` at offsets determined by `offsets * scale`,
1521/// where
1522/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1523/// that position instead.
1524///
1525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_ps)
1526#[inline]
1527#[target_feature(enable = "avx2")]
1528#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1529#[rustc_legacy_const_generics(4)]
1530#[stable(feature = "simd_x86", since = "1.27.0")]
1531pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1532 src: __m128,
1533 slice: *const f32,
1534 offsets: __m256i,
1535 mask: __m128,
1536) -> __m128 {
1537 static_assert_imm8_scale!(SCALE);
1538 let offsets: i64x4 = offsets.as_i64x4();
1539 let slice: *const i8 = slice as *const i8;
1540 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1541}
1542
1543/// Returns values from `slice` at offsets determined by `offsets * scale`,
1544/// where
1545/// `scale` should be 1, 2, 4 or 8.
1546///
1547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi64)
1548#[inline]
1549#[target_feature(enable = "avx2")]
1550#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1551#[rustc_legacy_const_generics(2)]
1552#[stable(feature = "simd_x86", since = "1.27.0")]
1553pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1554 slice: *const i64,
1555 offsets: __m128i,
1556) -> __m128i {
1557 static_assert_imm8_scale!(SCALE);
1558 let zero: i64x2 = i64x2::ZERO;
1559 let neg_one: i64x2 = _mm_set1_epi64x(-1).as_i64x2();
1560 let slice: *const i8 = slice as *const i8;
1561 let offsets: i64x2 = offsets.as_i64x2();
1562 let r: i64x2 = pgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1563 transmute(src:r)
1564}
1565
1566/// Returns values from `slice` at offsets determined by `offsets * scale`,
1567/// where
1568/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1569/// that position instead.
1570///
1571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi64)
1572#[inline]
1573#[target_feature(enable = "avx2")]
1574#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1575#[rustc_legacy_const_generics(4)]
1576#[stable(feature = "simd_x86", since = "1.27.0")]
1577pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1578 src: __m128i,
1579 slice: *const i64,
1580 offsets: __m128i,
1581 mask: __m128i,
1582) -> __m128i {
1583 static_assert_imm8_scale!(SCALE);
1584 let src: i64x2 = src.as_i64x2();
1585 let mask: i64x2 = mask.as_i64x2();
1586 let offsets: i64x2 = offsets.as_i64x2();
1587 let slice: *const i8 = slice as *const i8;
1588 let r: i64x2 = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1589 transmute(src:r)
1590}
1591
1592/// Returns values from `slice` at offsets determined by `offsets * scale`,
1593/// where
1594/// `scale` should be 1, 2, 4 or 8.
1595///
1596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi64)
1597#[inline]
1598#[target_feature(enable = "avx2")]
1599#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1600#[rustc_legacy_const_generics(2)]
1601#[stable(feature = "simd_x86", since = "1.27.0")]
1602pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1603 slice: *const i64,
1604 offsets: __m256i,
1605) -> __m256i {
1606 static_assert_imm8_scale!(SCALE);
1607 let zero: i64x4 = i64x4::ZERO;
1608 let neg_one: i64x4 = _mm256_set1_epi64x(-1).as_i64x4();
1609 let slice: *const i8 = slice as *const i8;
1610 let offsets: i64x4 = offsets.as_i64x4();
1611 let r: i64x4 = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1612 transmute(src:r)
1613}
1614
1615/// Returns values from `slice` at offsets determined by `offsets * scale`,
1616/// where
1617/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1618/// that position instead.
1619///
1620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi64)
1621#[inline]
1622#[target_feature(enable = "avx2")]
1623#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1624#[rustc_legacy_const_generics(4)]
1625#[stable(feature = "simd_x86", since = "1.27.0")]
1626pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1627 src: __m256i,
1628 slice: *const i64,
1629 offsets: __m256i,
1630 mask: __m256i,
1631) -> __m256i {
1632 static_assert_imm8_scale!(SCALE);
1633 let src: i64x4 = src.as_i64x4();
1634 let mask: i64x4 = mask.as_i64x4();
1635 let offsets: i64x4 = offsets.as_i64x4();
1636 let slice: *const i8 = slice as *const i8;
1637 let r: i64x4 = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1638 transmute(src:r)
1639}
1640
1641/// Returns values from `slice` at offsets determined by `offsets * scale`,
1642/// where
1643/// `scale` should be 1, 2, 4 or 8.
1644///
1645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd)
1646#[inline]
1647#[target_feature(enable = "avx2")]
1648#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1649#[rustc_legacy_const_generics(2)]
1650#[stable(feature = "simd_x86", since = "1.27.0")]
1651pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1652 static_assert_imm8_scale!(SCALE);
1653 let zero: __m128d = _mm_setzero_pd();
1654 let neg_one: __m128d = _mm_set1_pd(-1.0);
1655 let slice: *const i8 = slice as *const i8;
1656 let offsets: i64x2 = offsets.as_i64x2();
1657 pgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1658}
1659
1660/// Returns values from `slice` at offsets determined by `offsets * scale`,
1661/// where
1662/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1663/// that position instead.
1664///
1665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd)
1666#[inline]
1667#[target_feature(enable = "avx2")]
1668#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1669#[rustc_legacy_const_generics(4)]
1670#[stable(feature = "simd_x86", since = "1.27.0")]
1671pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1672 src: __m128d,
1673 slice: *const f64,
1674 offsets: __m128i,
1675 mask: __m128d,
1676) -> __m128d {
1677 static_assert_imm8_scale!(SCALE);
1678 let slice: *const i8 = slice as *const i8;
1679 let offsets: i64x2 = offsets.as_i64x2();
1680 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1681}
1682
1683/// Returns values from `slice` at offsets determined by `offsets * scale`,
1684/// where
1685/// `scale` should be 1, 2, 4 or 8.
1686///
1687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd)
1688#[inline]
1689#[target_feature(enable = "avx2")]
1690#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1691#[rustc_legacy_const_generics(2)]
1692#[stable(feature = "simd_x86", since = "1.27.0")]
1693pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1694 slice: *const f64,
1695 offsets: __m256i,
1696) -> __m256d {
1697 static_assert_imm8_scale!(SCALE);
1698 let zero: __m256d = _mm256_setzero_pd();
1699 let neg_one: __m256d = _mm256_set1_pd(-1.0);
1700 let slice: *const i8 = slice as *const i8;
1701 let offsets: i64x4 = offsets.as_i64x4();
1702 vpgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1703}
1704
1705/// Returns values from `slice` at offsets determined by `offsets * scale`,
1706/// where
1707/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1708/// that position instead.
1709///
1710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd)
1711#[inline]
1712#[target_feature(enable = "avx2")]
1713#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1714#[rustc_legacy_const_generics(4)]
1715#[stable(feature = "simd_x86", since = "1.27.0")]
1716pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1717 src: __m256d,
1718 slice: *const f64,
1719 offsets: __m256i,
1720 mask: __m256d,
1721) -> __m256d {
1722 static_assert_imm8_scale!(SCALE);
1723 let slice: *const i8 = slice as *const i8;
1724 let offsets: i64x4 = offsets.as_i64x4();
1725 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1726}
1727
1728/// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
1729/// location specified by `IMM1`.
1730///
1731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti128_si256)
1732#[inline]
1733#[target_feature(enable = "avx2")]
1734#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1735#[rustc_legacy_const_generics(2)]
1736#[stable(feature = "simd_x86", since = "1.27.0")]
1737pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1738 static_assert_uimm_bits!(IMM1, 1);
1739 unsafe {
1740 let a: i64x4 = a.as_i64x4();
1741 let b: i64x4 = _mm256_castsi128_si256(b).as_i64x4();
1742 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1743 transmute(src:dst)
1744 }
1745}
1746
1747/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
1748/// intermediate signed 32-bit integers. Horizontally add adjacent pairs
1749/// of intermediate 32-bit integers.
1750///
1751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd_epi16)
1752#[inline]
1753#[target_feature(enable = "avx2")]
1754#[cfg_attr(test, assert_instr(vpmaddwd))]
1755#[stable(feature = "simd_x86", since = "1.27.0")]
1756pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1757 // It's a trick used in the Adler-32 algorithm to perform a widening addition.
1758 //
1759 // ```rust
1760 // #[target_feature(enable = "avx2")]
1761 // unsafe fn widening_add(mad: __m256i) -> __m256i {
1762 // _mm256_madd_epi16(mad, _mm256_set1_epi16(1))
1763 // }
1764 // ```
1765 //
1766 // If we implement this using generic vector intrinsics, the optimizer
1767 // will eliminate this pattern, and `vpmaddwd` will no longer be emitted.
1768 // For this reason, we use x86 intrinsics.
1769 unsafe { transmute(src:pmaddwd(a.as_i16x16(), b.as_i16x16())) }
1770}
1771
1772/// Vertically multiplies each unsigned 8-bit integer from `a` with the
1773/// corresponding signed 8-bit integer from `b`, producing intermediate
1774/// signed 16-bit integers. Horizontally add adjacent pairs of intermediate
1775/// signed 16-bit integers
1776///
1777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16)
1778#[inline]
1779#[target_feature(enable = "avx2")]
1780#[cfg_attr(test, assert_instr(vpmaddubsw))]
1781#[stable(feature = "simd_x86", since = "1.27.0")]
1782pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1783 unsafe { transmute(src:pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
1784}
1785
1786/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1787/// (elements are zeroed out when the highest bit is not set in the
1788/// corresponding element).
1789///
1790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi32)
1791#[inline]
1792#[target_feature(enable = "avx2")]
1793#[cfg_attr(test, assert_instr(vpmaskmovd))]
1794#[stable(feature = "simd_x86", since = "1.27.0")]
1795pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1796 let mask: i32x4 = simd_shr(lhs:mask.as_i32x4(), rhs:i32x4::splat(31));
1797 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
1798}
1799
1800/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1801/// (elements are zeroed out when the highest bit is not set in the
1802/// corresponding element).
1803///
1804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi32)
1805#[inline]
1806#[target_feature(enable = "avx2")]
1807#[cfg_attr(test, assert_instr(vpmaskmovd))]
1808#[stable(feature = "simd_x86", since = "1.27.0")]
1809pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1810 let mask: i32x8 = simd_shr(lhs:mask.as_i32x8(), rhs:i32x8::splat(31));
1811 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
1812}
1813
1814/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
1815/// (elements are zeroed out when the highest bit is not set in the
1816/// corresponding element).
1817///
1818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi64)
1819#[inline]
1820#[target_feature(enable = "avx2")]
1821#[cfg_attr(test, assert_instr(vpmaskmovq))]
1822#[stable(feature = "simd_x86", since = "1.27.0")]
1823pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1824 let mask: i64x2 = simd_shr(lhs:mask.as_i64x2(), rhs:i64x2::splat(63));
1825 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
1826}
1827
1828/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
1829/// (elements are zeroed out when the highest bit is not set in the
1830/// corresponding element).
1831///
1832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi64)
1833#[inline]
1834#[target_feature(enable = "avx2")]
1835#[cfg_attr(test, assert_instr(vpmaskmovq))]
1836#[stable(feature = "simd_x86", since = "1.27.0")]
1837pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1838 let mask: i64x4 = simd_shr(lhs:mask.as_i64x4(), rhs:i64x4::splat(63));
1839 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
1840}
1841
1842/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
1843/// using `mask` (elements are not stored when the highest bit is not set
1844/// in the corresponding element).
1845///
1846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi32)
1847#[inline]
1848#[target_feature(enable = "avx2")]
1849#[cfg_attr(test, assert_instr(vpmaskmovd))]
1850#[stable(feature = "simd_x86", since = "1.27.0")]
1851pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1852 let mask: i32x4 = simd_shr(lhs:mask.as_i32x4(), rhs:i32x4::splat(31));
1853 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
1854}
1855
1856/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
1857/// using `mask` (elements are not stored when the highest bit is not set
1858/// in the corresponding element).
1859///
1860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi32)
1861#[inline]
1862#[target_feature(enable = "avx2")]
1863#[cfg_attr(test, assert_instr(vpmaskmovd))]
1864#[stable(feature = "simd_x86", since = "1.27.0")]
1865pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1866 let mask: i32x8 = simd_shr(lhs:mask.as_i32x8(), rhs:i32x8::splat(31));
1867 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
1868}
1869
1870/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
1871/// using `mask` (elements are not stored when the highest bit is not set
1872/// in the corresponding element).
1873///
1874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi64)
1875#[inline]
1876#[target_feature(enable = "avx2")]
1877#[cfg_attr(test, assert_instr(vpmaskmovq))]
1878#[stable(feature = "simd_x86", since = "1.27.0")]
1879pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1880 let mask: i64x2 = simd_shr(lhs:mask.as_i64x2(), rhs:i64x2::splat(63));
1881 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
1882}
1883
1884/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
1885/// using `mask` (elements are not stored when the highest bit is not set
1886/// in the corresponding element).
1887///
1888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi64)
1889#[inline]
1890#[target_feature(enable = "avx2")]
1891#[cfg_attr(test, assert_instr(vpmaskmovq))]
1892#[stable(feature = "simd_x86", since = "1.27.0")]
1893pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1894 let mask: i64x4 = simd_shr(lhs:mask.as_i64x4(), rhs:i64x4::splat(63));
1895 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
1896}
1897
1898/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
1899/// maximum values.
1900///
1901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi16)
1902#[inline]
1903#[target_feature(enable = "avx2")]
1904#[cfg_attr(test, assert_instr(vpmaxsw))]
1905#[stable(feature = "simd_x86", since = "1.27.0")]
1906pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1907 unsafe {
1908 let a: i16x16 = a.as_i16x16();
1909 let b: i16x16 = b.as_i16x16();
1910 transmute(src:simd_select::<i16x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1911 }
1912}
1913
1914/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
1915/// maximum values.
1916///
1917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi32)
1918#[inline]
1919#[target_feature(enable = "avx2")]
1920#[cfg_attr(test, assert_instr(vpmaxsd))]
1921#[stable(feature = "simd_x86", since = "1.27.0")]
1922pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1923 unsafe {
1924 let a: i32x8 = a.as_i32x8();
1925 let b: i32x8 = b.as_i32x8();
1926 transmute(src:simd_select::<i32x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1927 }
1928}
1929
1930/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
1931/// maximum values.
1932///
1933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi8)
1934#[inline]
1935#[target_feature(enable = "avx2")]
1936#[cfg_attr(test, assert_instr(vpmaxsb))]
1937#[stable(feature = "simd_x86", since = "1.27.0")]
1938pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1939 unsafe {
1940 let a: i8x32 = a.as_i8x32();
1941 let b: i8x32 = b.as_i8x32();
1942 transmute(src:simd_select::<i8x32, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1943 }
1944}
1945
1946/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
1947/// the packed maximum values.
1948///
1949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu16)
1950#[inline]
1951#[target_feature(enable = "avx2")]
1952#[cfg_attr(test, assert_instr(vpmaxuw))]
1953#[stable(feature = "simd_x86", since = "1.27.0")]
1954pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1955 unsafe {
1956 let a: u16x16 = a.as_u16x16();
1957 let b: u16x16 = b.as_u16x16();
1958 transmute(src:simd_select::<i16x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1959 }
1960}
1961
1962/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
1963/// the packed maximum values.
1964///
1965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu32)
1966#[inline]
1967#[target_feature(enable = "avx2")]
1968#[cfg_attr(test, assert_instr(vpmaxud))]
1969#[stable(feature = "simd_x86", since = "1.27.0")]
1970pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1971 unsafe {
1972 let a: u32x8 = a.as_u32x8();
1973 let b: u32x8 = b.as_u32x8();
1974 transmute(src:simd_select::<i32x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1975 }
1976}
1977
1978/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
1979/// the packed maximum values.
1980///
1981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu8)
1982#[inline]
1983#[target_feature(enable = "avx2")]
1984#[cfg_attr(test, assert_instr(vpmaxub))]
1985#[stable(feature = "simd_x86", since = "1.27.0")]
1986pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
1987 unsafe {
1988 let a: u8x32 = a.as_u8x32();
1989 let b: u8x32 = b.as_u8x32();
1990 transmute(src:simd_select::<i8x32, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1991 }
1992}
1993
1994/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
1995/// minimum values.
1996///
1997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi16)
1998#[inline]
1999#[target_feature(enable = "avx2")]
2000#[cfg_attr(test, assert_instr(vpminsw))]
2001#[stable(feature = "simd_x86", since = "1.27.0")]
2002pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
2003 unsafe {
2004 let a: i16x16 = a.as_i16x16();
2005 let b: i16x16 = b.as_i16x16();
2006 transmute(src:simd_select::<i16x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2007 }
2008}
2009
2010/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
2011/// minimum values.
2012///
2013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi32)
2014#[inline]
2015#[target_feature(enable = "avx2")]
2016#[cfg_attr(test, assert_instr(vpminsd))]
2017#[stable(feature = "simd_x86", since = "1.27.0")]
2018pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2019 unsafe {
2020 let a: i32x8 = a.as_i32x8();
2021 let b: i32x8 = b.as_i32x8();
2022 transmute(src:simd_select::<i32x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2023 }
2024}
2025
2026/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
2027/// minimum values.
2028///
2029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi8)
2030#[inline]
2031#[target_feature(enable = "avx2")]
2032#[cfg_attr(test, assert_instr(vpminsb))]
2033#[stable(feature = "simd_x86", since = "1.27.0")]
2034pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2035 unsafe {
2036 let a: i8x32 = a.as_i8x32();
2037 let b: i8x32 = b.as_i8x32();
2038 transmute(src:simd_select::<i8x32, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2039 }
2040}
2041
2042/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
2043/// the packed minimum values.
2044///
2045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu16)
2046#[inline]
2047#[target_feature(enable = "avx2")]
2048#[cfg_attr(test, assert_instr(vpminuw))]
2049#[stable(feature = "simd_x86", since = "1.27.0")]
2050pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2051 unsafe {
2052 let a: u16x16 = a.as_u16x16();
2053 let b: u16x16 = b.as_u16x16();
2054 transmute(src:simd_select::<i16x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2055 }
2056}
2057
2058/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
2059/// the packed minimum values.
2060///
2061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu32)
2062#[inline]
2063#[target_feature(enable = "avx2")]
2064#[cfg_attr(test, assert_instr(vpminud))]
2065#[stable(feature = "simd_x86", since = "1.27.0")]
2066pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2067 unsafe {
2068 let a: u32x8 = a.as_u32x8();
2069 let b: u32x8 = b.as_u32x8();
2070 transmute(src:simd_select::<i32x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2071 }
2072}
2073
2074/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2075/// the packed minimum values.
2076///
2077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu8)
2078#[inline]
2079#[target_feature(enable = "avx2")]
2080#[cfg_attr(test, assert_instr(vpminub))]
2081#[stable(feature = "simd_x86", since = "1.27.0")]
2082pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2083 unsafe {
2084 let a: u8x32 = a.as_u8x32();
2085 let b: u8x32 = b.as_u8x32();
2086 transmute(src:simd_select::<i8x32, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2087 }
2088}
2089
2090/// Creates mask from the most significant bit of each 8-bit element in `a`,
2091/// return the result.
2092///
2093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_epi8)
2094#[inline]
2095#[target_feature(enable = "avx2")]
2096#[cfg_attr(test, assert_instr(vpmovmskb))]
2097#[stable(feature = "simd_x86", since = "1.27.0")]
2098pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2099 unsafe {
2100 let z: i8x32 = i8x32::ZERO;
2101 let m: i8x32 = simd_lt(x:a.as_i8x32(), y:z);
2102 simd_bitmask::<_, u32>(m) as i32
2103 }
2104}
2105
2106/// Computes the sum of absolute differences (SADs) of quadruplets of unsigned
2107/// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit
2108/// results in dst. Eight SADs are performed for each 128-bit lane using one
2109/// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is
2110/// selected from `b` starting at on the offset specified in `imm8`. Eight
2111/// quadruplets are formed from sequential 8-bit integers selected from `a`
2112/// starting at the offset specified in `imm8`.
2113///
2114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mpsadbw_epu8)
2115#[inline]
2116#[target_feature(enable = "avx2")]
2117#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2118#[rustc_legacy_const_generics(2)]
2119#[stable(feature = "simd_x86", since = "1.27.0")]
2120pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2121 static_assert_uimm_bits!(IMM8, 8);
2122 unsafe { transmute(src:mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
2123}
2124
2125/// Multiplies the low 32-bit integers from each packed 64-bit element in
2126/// `a` and `b`
2127///
2128/// Returns the 64-bit results.
2129///
2130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epi32)
2131#[inline]
2132#[target_feature(enable = "avx2")]
2133#[cfg_attr(test, assert_instr(vpmuldq))]
2134#[stable(feature = "simd_x86", since = "1.27.0")]
2135pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2136 unsafe {
2137 let a: i64x4 = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2138 let b: i64x4 = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2139 transmute(src:simd_mul(x:a, y:b))
2140 }
2141}
2142
2143/// Multiplies the low unsigned 32-bit integers from each packed 64-bit
2144/// element in `a` and `b`
2145///
2146/// Returns the unsigned 64-bit results.
2147///
2148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epu32)
2149#[inline]
2150#[target_feature(enable = "avx2")]
2151#[cfg_attr(test, assert_instr(vpmuludq))]
2152#[stable(feature = "simd_x86", since = "1.27.0")]
2153pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2154 unsafe {
2155 let a: u64x4 = a.as_u64x4();
2156 let b: u64x4 = b.as_u64x4();
2157 let mask: u64x4 = u64x4::splat(u32::MAX.into());
2158 transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
2159 }
2160}
2161
2162/// Multiplies the packed 16-bit integers in `a` and `b`, producing
2163/// intermediate 32-bit integers and returning the high 16 bits of the
2164/// intermediate integers.
2165///
2166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epi16)
2167#[inline]
2168#[target_feature(enable = "avx2")]
2169#[cfg_attr(test, assert_instr(vpmulhw))]
2170#[stable(feature = "simd_x86", since = "1.27.0")]
2171pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2172 unsafe {
2173 let a: i32x16 = simd_cast::<_, i32x16>(a.as_i16x16());
2174 let b: i32x16 = simd_cast::<_, i32x16>(b.as_i16x16());
2175 let r: i32x16 = simd_shr(lhs:simd_mul(a, b), rhs:i32x16::splat(16));
2176 transmute(src:simd_cast::<i32x16, i16x16>(r))
2177 }
2178}
2179
2180/// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing
2181/// intermediate 32-bit integers and returning the high 16 bits of the
2182/// intermediate integers.
2183///
2184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epu16)
2185#[inline]
2186#[target_feature(enable = "avx2")]
2187#[cfg_attr(test, assert_instr(vpmulhuw))]
2188#[stable(feature = "simd_x86", since = "1.27.0")]
2189pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2190 unsafe {
2191 let a: u32x16 = simd_cast::<_, u32x16>(a.as_u16x16());
2192 let b: u32x16 = simd_cast::<_, u32x16>(b.as_u16x16());
2193 let r: u32x16 = simd_shr(lhs:simd_mul(a, b), rhs:u32x16::splat(16));
2194 transmute(src:simd_cast::<u32x16, u16x16>(r))
2195 }
2196}
2197
2198/// Multiplies the packed 16-bit integers in `a` and `b`, producing
2199/// intermediate 32-bit integers, and returns the low 16 bits of the
2200/// intermediate integers
2201///
2202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi16)
2203#[inline]
2204#[target_feature(enable = "avx2")]
2205#[cfg_attr(test, assert_instr(vpmullw))]
2206#[stable(feature = "simd_x86", since = "1.27.0")]
2207pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2208 unsafe { transmute(src:simd_mul(x:a.as_i16x16(), y:b.as_i16x16())) }
2209}
2210
2211/// Multiplies the packed 32-bit integers in `a` and `b`, producing
2212/// intermediate 64-bit integers, and returns the low 32 bits of the
2213/// intermediate integers
2214///
2215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi32)
2216#[inline]
2217#[target_feature(enable = "avx2")]
2218#[cfg_attr(test, assert_instr(vpmulld))]
2219#[stable(feature = "simd_x86", since = "1.27.0")]
2220pub fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2221 unsafe { transmute(src:simd_mul(x:a.as_i32x8(), y:b.as_i32x8())) }
2222}
2223
2224/// Multiplies packed 16-bit integers in `a` and `b`, producing
2225/// intermediate signed 32-bit integers. Truncate each intermediate
2226/// integer to the 18 most significant bits, round by adding 1, and
2227/// return bits `[16:1]`.
2228///
2229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhrs_epi16)
2230#[inline]
2231#[target_feature(enable = "avx2")]
2232#[cfg_attr(test, assert_instr(vpmulhrsw))]
2233#[stable(feature = "simd_x86", since = "1.27.0")]
2234pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2235 unsafe { transmute(src:pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2236}
2237
2238/// Computes the bitwise OR of 256 bits (representing integer data) in `a`
2239/// and `b`
2240///
2241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_si256)
2242#[inline]
2243#[target_feature(enable = "avx2")]
2244#[cfg_attr(test, assert_instr(vorps))]
2245#[stable(feature = "simd_x86", since = "1.27.0")]
2246pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2247 unsafe { transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8())) }
2248}
2249
2250/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2251/// using signed saturation
2252///
2253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16)
2254#[inline]
2255#[target_feature(enable = "avx2")]
2256#[cfg_attr(test, assert_instr(vpacksswb))]
2257#[stable(feature = "simd_x86", since = "1.27.0")]
2258pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2259 unsafe { transmute(src:packsswb(a.as_i16x16(), b.as_i16x16())) }
2260}
2261
2262/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2263/// using signed saturation
2264///
2265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32)
2266#[inline]
2267#[target_feature(enable = "avx2")]
2268#[cfg_attr(test, assert_instr(vpackssdw))]
2269#[stable(feature = "simd_x86", since = "1.27.0")]
2270pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2271 unsafe { transmute(src:packssdw(a.as_i32x8(), b.as_i32x8())) }
2272}
2273
2274/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2275/// using unsigned saturation
2276///
2277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16)
2278#[inline]
2279#[target_feature(enable = "avx2")]
2280#[cfg_attr(test, assert_instr(vpackuswb))]
2281#[stable(feature = "simd_x86", since = "1.27.0")]
2282pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2283 unsafe { transmute(src:packuswb(a.as_i16x16(), b.as_i16x16())) }
2284}
2285
2286/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2287/// using unsigned saturation
2288///
2289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32)
2290#[inline]
2291#[target_feature(enable = "avx2")]
2292#[cfg_attr(test, assert_instr(vpackusdw))]
2293#[stable(feature = "simd_x86", since = "1.27.0")]
2294pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2295 unsafe { transmute(src:packusdw(a.as_i32x8(), b.as_i32x8())) }
2296}
2297
2298/// Permutes packed 32-bit integers from `a` according to the content of `b`.
2299///
2300/// The last 3 bits of each integer of `b` are used as addresses into the 8
2301/// integers of `a`.
2302///
2303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32)
2304#[inline]
2305#[target_feature(enable = "avx2")]
2306#[cfg_attr(test, assert_instr(vpermps))]
2307#[stable(feature = "simd_x86", since = "1.27.0")]
2308pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2309 unsafe { transmute(src:permd(a.as_u32x8(), b.as_u32x8())) }
2310}
2311
2312/// Permutes 64-bit integers from `a` using control mask `imm8`.
2313///
2314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_epi64)
2315#[inline]
2316#[target_feature(enable = "avx2")]
2317#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2318#[rustc_legacy_const_generics(1)]
2319#[stable(feature = "simd_x86", since = "1.27.0")]
2320pub fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2321 static_assert_uimm_bits!(IMM8, 8);
2322 unsafe {
2323 let zero: i64x4 = i64x4::ZERO;
2324 let r: i64x4 = simd_shuffle!(
2325 a.as_i64x4(),
2326 zero,
2327 [
2328 IMM8 as u32 & 0b11,
2329 (IMM8 as u32 >> 2) & 0b11,
2330 (IMM8 as u32 >> 4) & 0b11,
2331 (IMM8 as u32 >> 6) & 0b11,
2332 ],
2333 );
2334 transmute(src:r)
2335 }
2336}
2337
2338/// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`.
2339///
2340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256)
2341#[inline]
2342#[target_feature(enable = "avx2")]
2343#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2344#[rustc_legacy_const_generics(2)]
2345#[stable(feature = "simd_x86", since = "1.27.0")]
2346pub fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2347 static_assert_uimm_bits!(IMM8, 8);
2348 _mm256_permute2f128_si256::<IMM8>(a, b)
2349}
2350
2351/// Shuffles 64-bit floating-point elements in `a` across lanes using the
2352/// control in `imm8`.
2353///
2354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_pd)
2355#[inline]
2356#[target_feature(enable = "avx2")]
2357#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2358#[rustc_legacy_const_generics(1)]
2359#[stable(feature = "simd_x86", since = "1.27.0")]
2360pub fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2361 static_assert_uimm_bits!(IMM8, 8);
2362 unsafe {
2363 simd_shuffle!(
2364 a,
2365 _mm256_undefined_pd(),
2366 [
2367 IMM8 as u32 & 0b11,
2368 (IMM8 as u32 >> 2) & 0b11,
2369 (IMM8 as u32 >> 4) & 0b11,
2370 (IMM8 as u32 >> 6) & 0b11,
2371 ],
2372 )
2373 }
2374}
2375
2376/// Shuffles eight 32-bit floating-point elements in `a` across lanes using
2377/// the corresponding 32-bit integer index in `idx`.
2378///
2379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_ps)
2380#[inline]
2381#[target_feature(enable = "avx2")]
2382#[cfg_attr(test, assert_instr(vpermps))]
2383#[stable(feature = "simd_x86", since = "1.27.0")]
2384pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2385 unsafe { permps(a, b:idx.as_i32x8()) }
2386}
2387
2388/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
2389/// and `b`, then horizontally sum each consecutive 8 differences to
2390/// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
2391/// integers in the low 16 bits of the 64-bit return value
2392///
2393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sad_epu8)
2394#[inline]
2395#[target_feature(enable = "avx2")]
2396#[cfg_attr(test, assert_instr(vpsadbw))]
2397#[stable(feature = "simd_x86", since = "1.27.0")]
2398pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2399 unsafe { transmute(src:psadbw(a.as_u8x32(), b.as_u8x32())) }
2400}
2401
2402/// Shuffles bytes from `a` according to the content of `b`.
2403///
2404/// For each of the 128-bit low and high halves of the vectors, the last
2405/// 4 bits of each byte of `b` are used as addresses into the respective
2406/// low or high 16 bytes of `a`. That is, the halves are shuffled separately.
2407///
2408/// In addition, if the highest significant bit of a byte of `b` is set, the
2409/// respective destination byte is set to 0.
2410///
2411/// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically
2412/// equivalent to:
2413///
2414/// ```
2415/// fn mm256_shuffle_epi8(a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
2416/// let mut r = [0; 32];
2417/// for i in 0..16 {
2418/// // if the most significant bit of b is set,
2419/// // then the destination byte is set to 0.
2420/// if b[i] & 0x80 == 0u8 {
2421/// r[i] = a[(b[i] % 16) as usize];
2422/// }
2423/// if b[i + 16] & 0x80 == 0u8 {
2424/// r[i + 16] = a[(b[i + 16] % 16 + 16) as usize];
2425/// }
2426/// }
2427/// r
2428/// }
2429/// ```
2430///
2431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi8)
2432#[inline]
2433#[target_feature(enable = "avx2")]
2434#[cfg_attr(test, assert_instr(vpshufb))]
2435#[stable(feature = "simd_x86", since = "1.27.0")]
2436pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2437 unsafe { transmute(src:pshufb(a.as_u8x32(), b.as_u8x32())) }
2438}
2439
2440/// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in
2441/// `imm8`.
2442///
2443/// ```rust
2444/// #[cfg(target_arch = "x86")]
2445/// use std::arch::x86::*;
2446/// #[cfg(target_arch = "x86_64")]
2447/// use std::arch::x86_64::*;
2448///
2449/// # fn main() {
2450/// # if is_x86_feature_detected!("avx2") {
2451/// # #[target_feature(enable = "avx2")]
2452/// # unsafe fn worker() {
2453/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
2454///
2455/// let c1 = _mm256_shuffle_epi32(a, 0b00_11_10_01);
2456/// let c2 = _mm256_shuffle_epi32(a, 0b01_00_10_11);
2457///
2458/// let expected1 = _mm256_setr_epi32(1, 2, 3, 0, 5, 6, 7, 4);
2459/// let expected2 = _mm256_setr_epi32(3, 2, 0, 1, 7, 6, 4, 5);
2460///
2461/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c1, expected1)), !0);
2462/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c2, expected2)), !0);
2463/// # }
2464/// # unsafe { worker(); }
2465/// # }
2466/// # }
2467/// ```
2468///
2469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi32)
2470#[inline]
2471#[target_feature(enable = "avx2")]
2472#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2473#[rustc_legacy_const_generics(1)]
2474#[stable(feature = "simd_x86", since = "1.27.0")]
2475pub fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2476 static_assert_uimm_bits!(MASK, 8);
2477 unsafe {
2478 let r: i32x8 = simd_shuffle!(
2479 a.as_i32x8(),
2480 a.as_i32x8(),
2481 [
2482 MASK as u32 & 0b11,
2483 (MASK as u32 >> 2) & 0b11,
2484 (MASK as u32 >> 4) & 0b11,
2485 (MASK as u32 >> 6) & 0b11,
2486 (MASK as u32 & 0b11) + 4,
2487 ((MASK as u32 >> 2) & 0b11) + 4,
2488 ((MASK as u32 >> 4) & 0b11) + 4,
2489 ((MASK as u32 >> 6) & 0b11) + 4,
2490 ],
2491 );
2492 transmute(src:r)
2493 }
2494}
2495
2496/// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using
2497/// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied
2498/// to the output.
2499///
2500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflehi_epi16)
2501#[inline]
2502#[target_feature(enable = "avx2")]
2503#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2504#[rustc_legacy_const_generics(1)]
2505#[stable(feature = "simd_x86", since = "1.27.0")]
2506pub fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2507 static_assert_uimm_bits!(IMM8, 8);
2508 unsafe {
2509 let a = a.as_i16x16();
2510 let r: i16x16 = simd_shuffle!(
2511 a,
2512 a,
2513 [
2514 0,
2515 1,
2516 2,
2517 3,
2518 4 + (IMM8 as u32 & 0b11),
2519 4 + ((IMM8 as u32 >> 2) & 0b11),
2520 4 + ((IMM8 as u32 >> 4) & 0b11),
2521 4 + ((IMM8 as u32 >> 6) & 0b11),
2522 8,
2523 9,
2524 10,
2525 11,
2526 12 + (IMM8 as u32 & 0b11),
2527 12 + ((IMM8 as u32 >> 2) & 0b11),
2528 12 + ((IMM8 as u32 >> 4) & 0b11),
2529 12 + ((IMM8 as u32 >> 6) & 0b11),
2530 ],
2531 );
2532 transmute(r)
2533 }
2534}
2535
2536/// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using
2537/// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied
2538/// to the output.
2539///
2540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflelo_epi16)
2541#[inline]
2542#[target_feature(enable = "avx2")]
2543#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2544#[rustc_legacy_const_generics(1)]
2545#[stable(feature = "simd_x86", since = "1.27.0")]
2546pub fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2547 static_assert_uimm_bits!(IMM8, 8);
2548 unsafe {
2549 let a = a.as_i16x16();
2550 let r: i16x16 = simd_shuffle!(
2551 a,
2552 a,
2553 [
2554 0 + (IMM8 as u32 & 0b11),
2555 0 + ((IMM8 as u32 >> 2) & 0b11),
2556 0 + ((IMM8 as u32 >> 4) & 0b11),
2557 0 + ((IMM8 as u32 >> 6) & 0b11),
2558 4,
2559 5,
2560 6,
2561 7,
2562 8 + (IMM8 as u32 & 0b11),
2563 8 + ((IMM8 as u32 >> 2) & 0b11),
2564 8 + ((IMM8 as u32 >> 4) & 0b11),
2565 8 + ((IMM8 as u32 >> 6) & 0b11),
2566 12,
2567 13,
2568 14,
2569 15,
2570 ],
2571 );
2572 transmute(r)
2573 }
2574}
2575
2576/// Negates packed 16-bit integers in `a` when the corresponding signed
2577/// 16-bit integer in `b` is negative, and returns the results.
2578/// Results are zeroed out when the corresponding element in `b` is zero.
2579///
2580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi16)
2581#[inline]
2582#[target_feature(enable = "avx2")]
2583#[cfg_attr(test, assert_instr(vpsignw))]
2584#[stable(feature = "simd_x86", since = "1.27.0")]
2585pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2586 unsafe { transmute(src:psignw(a.as_i16x16(), b.as_i16x16())) }
2587}
2588
2589/// Negates packed 32-bit integers in `a` when the corresponding signed
2590/// 32-bit integer in `b` is negative, and returns the results.
2591/// Results are zeroed out when the corresponding element in `b` is zero.
2592///
2593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi32)
2594#[inline]
2595#[target_feature(enable = "avx2")]
2596#[cfg_attr(test, assert_instr(vpsignd))]
2597#[stable(feature = "simd_x86", since = "1.27.0")]
2598pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2599 unsafe { transmute(src:psignd(a.as_i32x8(), b.as_i32x8())) }
2600}
2601
2602/// Negates packed 8-bit integers in `a` when the corresponding signed
2603/// 8-bit integer in `b` is negative, and returns the results.
2604/// Results are zeroed out when the corresponding element in `b` is zero.
2605///
2606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi8)
2607#[inline]
2608#[target_feature(enable = "avx2")]
2609#[cfg_attr(test, assert_instr(vpsignb))]
2610#[stable(feature = "simd_x86", since = "1.27.0")]
2611pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2612 unsafe { transmute(src:psignb(a.as_i8x32(), b.as_i8x32())) }
2613}
2614
2615/// Shifts packed 16-bit integers in `a` left by `count` while
2616/// shifting in zeros, and returns the result
2617///
2618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi16)
2619#[inline]
2620#[target_feature(enable = "avx2")]
2621#[cfg_attr(test, assert_instr(vpsllw))]
2622#[stable(feature = "simd_x86", since = "1.27.0")]
2623pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2624 unsafe { transmute(src:psllw(a.as_i16x16(), count.as_i16x8())) }
2625}
2626
2627/// Shifts packed 32-bit integers in `a` left by `count` while
2628/// shifting in zeros, and returns the result
2629///
2630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi32)
2631#[inline]
2632#[target_feature(enable = "avx2")]
2633#[cfg_attr(test, assert_instr(vpslld))]
2634#[stable(feature = "simd_x86", since = "1.27.0")]
2635pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2636 unsafe { transmute(src:pslld(a.as_i32x8(), count.as_i32x4())) }
2637}
2638
2639/// Shifts packed 64-bit integers in `a` left by `count` while
2640/// shifting in zeros, and returns the result
2641///
2642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi64)
2643#[inline]
2644#[target_feature(enable = "avx2")]
2645#[cfg_attr(test, assert_instr(vpsllq))]
2646#[stable(feature = "simd_x86", since = "1.27.0")]
2647pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2648 unsafe { transmute(src:psllq(a.as_i64x4(), count.as_i64x2())) }
2649}
2650
2651/// Shifts packed 16-bit integers in `a` left by `IMM8` while
2652/// shifting in zeros, return the results;
2653///
2654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi16)
2655#[inline]
2656#[target_feature(enable = "avx2")]
2657#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2658#[rustc_legacy_const_generics(1)]
2659#[stable(feature = "simd_x86", since = "1.27.0")]
2660pub fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2661 static_assert_uimm_bits!(IMM8, 8);
2662 unsafe {
2663 if IMM8 >= 16 {
2664 _mm256_setzero_si256()
2665 } else {
2666 transmute(src:simd_shl(lhs:a.as_u16x16(), rhs:u16x16::splat(IMM8 as u16)))
2667 }
2668 }
2669}
2670
2671/// Shifts packed 32-bit integers in `a` left by `IMM8` while
2672/// shifting in zeros, return the results;
2673///
2674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi32)
2675#[inline]
2676#[target_feature(enable = "avx2")]
2677#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2678#[rustc_legacy_const_generics(1)]
2679#[stable(feature = "simd_x86", since = "1.27.0")]
2680pub fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2681 unsafe {
2682 static_assert_uimm_bits!(IMM8, 8);
2683 if IMM8 >= 32 {
2684 _mm256_setzero_si256()
2685 } else {
2686 transmute(src:simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8 as u32)))
2687 }
2688 }
2689}
2690
2691/// Shifts packed 64-bit integers in `a` left by `IMM8` while
2692/// shifting in zeros, return the results;
2693///
2694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi64)
2695#[inline]
2696#[target_feature(enable = "avx2")]
2697#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2698#[rustc_legacy_const_generics(1)]
2699#[stable(feature = "simd_x86", since = "1.27.0")]
2700pub fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2701 unsafe {
2702 static_assert_uimm_bits!(IMM8, 8);
2703 if IMM8 >= 64 {
2704 _mm256_setzero_si256()
2705 } else {
2706 transmute(src:simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64)))
2707 }
2708 }
2709}
2710
2711/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2712///
2713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_si256)
2714#[inline]
2715#[target_feature(enable = "avx2")]
2716#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2717#[rustc_legacy_const_generics(1)]
2718#[stable(feature = "simd_x86", since = "1.27.0")]
2719pub fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2720 static_assert_uimm_bits!(IMM8, 8);
2721 _mm256_bslli_epi128::<IMM8>(a)
2722}
2723
2724/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2725///
2726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bslli_epi128)
2727#[inline]
2728#[target_feature(enable = "avx2")]
2729#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2730#[rustc_legacy_const_generics(1)]
2731#[stable(feature = "simd_x86", since = "1.27.0")]
2732pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2733 static_assert_uimm_bits!(IMM8, 8);
2734 const fn mask(shift: i32, i: u32) -> u32 {
2735 let shift = shift as u32 & 0xff;
2736 if shift > 15 || i % 16 < shift {
2737 0
2738 } else {
2739 32 + (i - shift)
2740 }
2741 }
2742 unsafe {
2743 let a = a.as_i8x32();
2744 let r: i8x32 = simd_shuffle!(
2745 i8x32::ZERO,
2746 a,
2747 [
2748 mask(IMM8, 0),
2749 mask(IMM8, 1),
2750 mask(IMM8, 2),
2751 mask(IMM8, 3),
2752 mask(IMM8, 4),
2753 mask(IMM8, 5),
2754 mask(IMM8, 6),
2755 mask(IMM8, 7),
2756 mask(IMM8, 8),
2757 mask(IMM8, 9),
2758 mask(IMM8, 10),
2759 mask(IMM8, 11),
2760 mask(IMM8, 12),
2761 mask(IMM8, 13),
2762 mask(IMM8, 14),
2763 mask(IMM8, 15),
2764 mask(IMM8, 16),
2765 mask(IMM8, 17),
2766 mask(IMM8, 18),
2767 mask(IMM8, 19),
2768 mask(IMM8, 20),
2769 mask(IMM8, 21),
2770 mask(IMM8, 22),
2771 mask(IMM8, 23),
2772 mask(IMM8, 24),
2773 mask(IMM8, 25),
2774 mask(IMM8, 26),
2775 mask(IMM8, 27),
2776 mask(IMM8, 28),
2777 mask(IMM8, 29),
2778 mask(IMM8, 30),
2779 mask(IMM8, 31),
2780 ],
2781 );
2782 transmute(r)
2783 }
2784}
2785
2786/// Shifts packed 32-bit integers in `a` left by the amount
2787/// specified by the corresponding element in `count` while
2788/// shifting in zeros, and returns the result.
2789///
2790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi32)
2791#[inline]
2792#[target_feature(enable = "avx2")]
2793#[cfg_attr(test, assert_instr(vpsllvd))]
2794#[stable(feature = "simd_x86", since = "1.27.0")]
2795pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2796 unsafe {
2797 let count: u32x4 = count.as_u32x4();
2798 let no_overflow: u32x4 = simd_lt(x:count, y:u32x4::splat(u32::BITS));
2799 let count: u32x4 = simd_select(mask:no_overflow, if_true:count, if_false:u32x4::ZERO);
2800 simd_select(mask:no_overflow, if_true:simd_shl(a.as_u32x4(), count), if_false:u32x4::ZERO).as_m128i()
2801 }
2802}
2803
2804/// Shifts packed 32-bit integers in `a` left by the amount
2805/// specified by the corresponding element in `count` while
2806/// shifting in zeros, and returns the result.
2807///
2808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi32)
2809#[inline]
2810#[target_feature(enable = "avx2")]
2811#[cfg_attr(test, assert_instr(vpsllvd))]
2812#[stable(feature = "simd_x86", since = "1.27.0")]
2813pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2814 unsafe {
2815 let count: u32x8 = count.as_u32x8();
2816 let no_overflow: u32x8 = simd_lt(x:count, y:u32x8::splat(u32::BITS));
2817 let count: u32x8 = simd_select(mask:no_overflow, if_true:count, if_false:u32x8::ZERO);
2818 simd_select(mask:no_overflow, if_true:simd_shl(a.as_u32x8(), count), if_false:u32x8::ZERO).as_m256i()
2819 }
2820}
2821
2822/// Shifts packed 64-bit integers in `a` left by the amount
2823/// specified by the corresponding element in `count` while
2824/// shifting in zeros, and returns the result.
2825///
2826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi64)
2827#[inline]
2828#[target_feature(enable = "avx2")]
2829#[cfg_attr(test, assert_instr(vpsllvq))]
2830#[stable(feature = "simd_x86", since = "1.27.0")]
2831pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2832 unsafe {
2833 let count: u64x2 = count.as_u64x2();
2834 let no_overflow: u64x2 = simd_lt(x:count, y:u64x2::splat(u64::BITS as u64));
2835 let count: u64x2 = simd_select(mask:no_overflow, if_true:count, if_false:u64x2::ZERO);
2836 simd_select(mask:no_overflow, if_true:simd_shl(a.as_u64x2(), count), if_false:u64x2::ZERO).as_m128i()
2837 }
2838}
2839
2840/// Shifts packed 64-bit integers in `a` left by the amount
2841/// specified by the corresponding element in `count` while
2842/// shifting in zeros, and returns the result.
2843///
2844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi64)
2845#[inline]
2846#[target_feature(enable = "avx2")]
2847#[cfg_attr(test, assert_instr(vpsllvq))]
2848#[stable(feature = "simd_x86", since = "1.27.0")]
2849pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2850 unsafe {
2851 let count: u64x4 = count.as_u64x4();
2852 let no_overflow: u64x4 = simd_lt(x:count, y:u64x4::splat(u64::BITS as u64));
2853 let count: u64x4 = simd_select(mask:no_overflow, if_true:count, if_false:u64x4::ZERO);
2854 simd_select(mask:no_overflow, if_true:simd_shl(a.as_u64x4(), count), if_false:u64x4::ZERO).as_m256i()
2855 }
2856}
2857
2858/// Shifts packed 16-bit integers in `a` right by `count` while
2859/// shifting in sign bits.
2860///
2861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi16)
2862#[inline]
2863#[target_feature(enable = "avx2")]
2864#[cfg_attr(test, assert_instr(vpsraw))]
2865#[stable(feature = "simd_x86", since = "1.27.0")]
2866pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2867 unsafe { transmute(src:psraw(a.as_i16x16(), count.as_i16x8())) }
2868}
2869
2870/// Shifts packed 32-bit integers in `a` right by `count` while
2871/// shifting in sign bits.
2872///
2873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi32)
2874#[inline]
2875#[target_feature(enable = "avx2")]
2876#[cfg_attr(test, assert_instr(vpsrad))]
2877#[stable(feature = "simd_x86", since = "1.27.0")]
2878pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2879 unsafe { transmute(src:psrad(a.as_i32x8(), count.as_i32x4())) }
2880}
2881
2882/// Shifts packed 16-bit integers in `a` right by `IMM8` while
2883/// shifting in sign bits.
2884///
2885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi16)
2886#[inline]
2887#[target_feature(enable = "avx2")]
2888#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2889#[rustc_legacy_const_generics(1)]
2890#[stable(feature = "simd_x86", since = "1.27.0")]
2891pub fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2892 static_assert_uimm_bits!(IMM8, 8);
2893 unsafe { transmute(src:simd_shr(lhs:a.as_i16x16(), rhs:i16x16::splat(IMM8.min(15) as i16))) }
2894}
2895
2896/// Shifts packed 32-bit integers in `a` right by `IMM8` while
2897/// shifting in sign bits.
2898///
2899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi32)
2900#[inline]
2901#[target_feature(enable = "avx2")]
2902#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2903#[rustc_legacy_const_generics(1)]
2904#[stable(feature = "simd_x86", since = "1.27.0")]
2905pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2906 static_assert_uimm_bits!(IMM8, 8);
2907 unsafe { transmute(src:simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(31)))) }
2908}
2909
2910/// Shifts packed 32-bit integers in `a` right by the amount specified by the
2911/// corresponding element in `count` while shifting in sign bits.
2912///
2913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi32)
2914#[inline]
2915#[target_feature(enable = "avx2")]
2916#[cfg_attr(test, assert_instr(vpsravd))]
2917#[stable(feature = "simd_x86", since = "1.27.0")]
2918pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2919 unsafe {
2920 let count: u32x4 = count.as_u32x4();
2921 let no_overflow: u32x4 = simd_lt(x:count, y:u32x4::splat(u32::BITS));
2922 let count: i32x4 = simd_select(mask:no_overflow, if_true:transmute(count), if_false:i32x4::splat(31));
2923 simd_shr(lhs:a.as_i32x4(), rhs:count).as_m128i()
2924 }
2925}
2926
2927/// Shifts packed 32-bit integers in `a` right by the amount specified by the
2928/// corresponding element in `count` while shifting in sign bits.
2929///
2930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi32)
2931#[inline]
2932#[target_feature(enable = "avx2")]
2933#[cfg_attr(test, assert_instr(vpsravd))]
2934#[stable(feature = "simd_x86", since = "1.27.0")]
2935pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2936 unsafe {
2937 let count: u32x8 = count.as_u32x8();
2938 let no_overflow: u32x8 = simd_lt(x:count, y:u32x8::splat(u32::BITS));
2939 let count: i32x8 = simd_select(mask:no_overflow, if_true:transmute(count), if_false:i32x8::splat(31));
2940 simd_shr(lhs:a.as_i32x8(), rhs:count).as_m256i()
2941 }
2942}
2943
2944/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2945///
2946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_si256)
2947#[inline]
2948#[target_feature(enable = "avx2")]
2949#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2950#[rustc_legacy_const_generics(1)]
2951#[stable(feature = "simd_x86", since = "1.27.0")]
2952pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2953 static_assert_uimm_bits!(IMM8, 8);
2954 _mm256_bsrli_epi128::<IMM8>(a)
2955}
2956
2957/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2958///
2959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bsrli_epi128)
2960#[inline]
2961#[target_feature(enable = "avx2")]
2962#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2963#[rustc_legacy_const_generics(1)]
2964#[stable(feature = "simd_x86", since = "1.27.0")]
2965pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2966 static_assert_uimm_bits!(IMM8, 8);
2967 const fn mask(shift: i32, i: u32) -> u32 {
2968 let shift = shift as u32 & 0xff;
2969 if shift > 15 || (15 - (i % 16)) < shift {
2970 0
2971 } else {
2972 32 + (i + shift)
2973 }
2974 }
2975 unsafe {
2976 let a = a.as_i8x32();
2977 let r: i8x32 = simd_shuffle!(
2978 i8x32::ZERO,
2979 a,
2980 [
2981 mask(IMM8, 0),
2982 mask(IMM8, 1),
2983 mask(IMM8, 2),
2984 mask(IMM8, 3),
2985 mask(IMM8, 4),
2986 mask(IMM8, 5),
2987 mask(IMM8, 6),
2988 mask(IMM8, 7),
2989 mask(IMM8, 8),
2990 mask(IMM8, 9),
2991 mask(IMM8, 10),
2992 mask(IMM8, 11),
2993 mask(IMM8, 12),
2994 mask(IMM8, 13),
2995 mask(IMM8, 14),
2996 mask(IMM8, 15),
2997 mask(IMM8, 16),
2998 mask(IMM8, 17),
2999 mask(IMM8, 18),
3000 mask(IMM8, 19),
3001 mask(IMM8, 20),
3002 mask(IMM8, 21),
3003 mask(IMM8, 22),
3004 mask(IMM8, 23),
3005 mask(IMM8, 24),
3006 mask(IMM8, 25),
3007 mask(IMM8, 26),
3008 mask(IMM8, 27),
3009 mask(IMM8, 28),
3010 mask(IMM8, 29),
3011 mask(IMM8, 30),
3012 mask(IMM8, 31),
3013 ],
3014 );
3015 transmute(r)
3016 }
3017}
3018
3019/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
3020/// zeros.
3021///
3022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi16)
3023#[inline]
3024#[target_feature(enable = "avx2")]
3025#[cfg_attr(test, assert_instr(vpsrlw))]
3026#[stable(feature = "simd_x86", since = "1.27.0")]
3027pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
3028 unsafe { transmute(src:psrlw(a.as_i16x16(), count.as_i16x8())) }
3029}
3030
3031/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
3032/// zeros.
3033///
3034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi32)
3035#[inline]
3036#[target_feature(enable = "avx2")]
3037#[cfg_attr(test, assert_instr(vpsrld))]
3038#[stable(feature = "simd_x86", since = "1.27.0")]
3039pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3040 unsafe { transmute(src:psrld(a.as_i32x8(), count.as_i32x4())) }
3041}
3042
3043/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
3044/// zeros.
3045///
3046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi64)
3047#[inline]
3048#[target_feature(enable = "avx2")]
3049#[cfg_attr(test, assert_instr(vpsrlq))]
3050#[stable(feature = "simd_x86", since = "1.27.0")]
3051pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3052 unsafe { transmute(src:psrlq(a.as_i64x4(), count.as_i64x2())) }
3053}
3054
3055/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
3056/// zeros
3057///
3058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi16)
3059#[inline]
3060#[target_feature(enable = "avx2")]
3061#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3062#[rustc_legacy_const_generics(1)]
3063#[stable(feature = "simd_x86", since = "1.27.0")]
3064pub fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3065 static_assert_uimm_bits!(IMM8, 8);
3066 unsafe {
3067 if IMM8 >= 16 {
3068 _mm256_setzero_si256()
3069 } else {
3070 transmute(src:simd_shr(lhs:a.as_u16x16(), rhs:u16x16::splat(IMM8 as u16)))
3071 }
3072 }
3073}
3074
3075/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
3076/// zeros
3077///
3078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi32)
3079#[inline]
3080#[target_feature(enable = "avx2")]
3081#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3082#[rustc_legacy_const_generics(1)]
3083#[stable(feature = "simd_x86", since = "1.27.0")]
3084pub fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3085 static_assert_uimm_bits!(IMM8, 8);
3086 unsafe {
3087 if IMM8 >= 32 {
3088 _mm256_setzero_si256()
3089 } else {
3090 transmute(src:simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8 as u32)))
3091 }
3092 }
3093}
3094
3095/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
3096/// zeros
3097///
3098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi64)
3099#[inline]
3100#[target_feature(enable = "avx2")]
3101#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3102#[rustc_legacy_const_generics(1)]
3103#[stable(feature = "simd_x86", since = "1.27.0")]
3104pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3105 static_assert_uimm_bits!(IMM8, 8);
3106 unsafe {
3107 if IMM8 >= 64 {
3108 _mm256_setzero_si256()
3109 } else {
3110 transmute(src:simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64)))
3111 }
3112 }
3113}
3114
3115/// Shifts packed 32-bit integers in `a` right by the amount specified by
3116/// the corresponding element in `count` while shifting in zeros,
3117///
3118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi32)
3119#[inline]
3120#[target_feature(enable = "avx2")]
3121#[cfg_attr(test, assert_instr(vpsrlvd))]
3122#[stable(feature = "simd_x86", since = "1.27.0")]
3123pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3124 unsafe {
3125 let count: u32x4 = count.as_u32x4();
3126 let no_overflow: u32x4 = simd_lt(x:count, y:u32x4::splat(u32::BITS));
3127 let count: u32x4 = simd_select(mask:no_overflow, if_true:count, if_false:u32x4::ZERO);
3128 simd_select(mask:no_overflow, if_true:simd_shr(a.as_u32x4(), count), if_false:u32x4::ZERO).as_m128i()
3129 }
3130}
3131
3132/// Shifts packed 32-bit integers in `a` right by the amount specified by
3133/// the corresponding element in `count` while shifting in zeros,
3134///
3135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi32)
3136#[inline]
3137#[target_feature(enable = "avx2")]
3138#[cfg_attr(test, assert_instr(vpsrlvd))]
3139#[stable(feature = "simd_x86", since = "1.27.0")]
3140pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3141 unsafe {
3142 let count: u32x8 = count.as_u32x8();
3143 let no_overflow: u32x8 = simd_lt(x:count, y:u32x8::splat(u32::BITS));
3144 let count: u32x8 = simd_select(mask:no_overflow, if_true:count, if_false:u32x8::ZERO);
3145 simd_select(mask:no_overflow, if_true:simd_shr(a.as_u32x8(), count), if_false:u32x8::ZERO).as_m256i()
3146 }
3147}
3148
3149/// Shifts packed 64-bit integers in `a` right by the amount specified by
3150/// the corresponding element in `count` while shifting in zeros,
3151///
3152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi64)
3153#[inline]
3154#[target_feature(enable = "avx2")]
3155#[cfg_attr(test, assert_instr(vpsrlvq))]
3156#[stable(feature = "simd_x86", since = "1.27.0")]
3157pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3158 unsafe {
3159 let count: u64x2 = count.as_u64x2();
3160 let no_overflow: u64x2 = simd_lt(x:count, y:u64x2::splat(u64::BITS as u64));
3161 let count: u64x2 = simd_select(mask:no_overflow, if_true:count, if_false:u64x2::ZERO);
3162 simd_select(mask:no_overflow, if_true:simd_shr(a.as_u64x2(), count), if_false:u64x2::ZERO).as_m128i()
3163 }
3164}
3165
3166/// Shifts packed 64-bit integers in `a` right by the amount specified by
3167/// the corresponding element in `count` while shifting in zeros,
3168///
3169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi64)
3170#[inline]
3171#[target_feature(enable = "avx2")]
3172#[cfg_attr(test, assert_instr(vpsrlvq))]
3173#[stable(feature = "simd_x86", since = "1.27.0")]
3174pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3175 unsafe {
3176 let count: u64x4 = count.as_u64x4();
3177 let no_overflow: u64x4 = simd_lt(x:count, y:u64x4::splat(u64::BITS as u64));
3178 let count: u64x4 = simd_select(mask:no_overflow, if_true:count, if_false:u64x4::ZERO);
3179 simd_select(mask:no_overflow, if_true:simd_shr(a.as_u64x4(), count), if_false:u64x4::ZERO).as_m256i()
3180 }
3181}
3182
3183/// Load 256-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
3184/// must be aligned on a 32-byte boundary or a general-protection exception may be generated. To
3185/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
3186///
3187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_load_si256)
3188#[inline]
3189#[target_feature(enable = "avx2")]
3190#[cfg_attr(test, assert_instr(vmovntdqa))]
3191#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3192pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3193 let dst: __m256i;
3194 crate::arch::asm!(
3195 vpl!("vmovntdqa {a}"),
3196 a = out(ymm_reg) dst,
3197 p = in(reg) mem_addr,
3198 options(pure, readonly, nostack, preserves_flags),
3199 );
3200 dst
3201}
3202
3203/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
3204///
3205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi16)
3206#[inline]
3207#[target_feature(enable = "avx2")]
3208#[cfg_attr(test, assert_instr(vpsubw))]
3209#[stable(feature = "simd_x86", since = "1.27.0")]
3210pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3211 unsafe { transmute(src:simd_sub(lhs:a.as_i16x16(), rhs:b.as_i16x16())) }
3212}
3213
3214/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`
3215///
3216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi32)
3217#[inline]
3218#[target_feature(enable = "avx2")]
3219#[cfg_attr(test, assert_instr(vpsubd))]
3220#[stable(feature = "simd_x86", since = "1.27.0")]
3221pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3222 unsafe { transmute(src:simd_sub(lhs:a.as_i32x8(), rhs:b.as_i32x8())) }
3223}
3224
3225/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`
3226///
3227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi64)
3228#[inline]
3229#[target_feature(enable = "avx2")]
3230#[cfg_attr(test, assert_instr(vpsubq))]
3231#[stable(feature = "simd_x86", since = "1.27.0")]
3232pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3233 unsafe { transmute(src:simd_sub(lhs:a.as_i64x4(), rhs:b.as_i64x4())) }
3234}
3235
3236/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
3237///
3238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi8)
3239#[inline]
3240#[target_feature(enable = "avx2")]
3241#[cfg_attr(test, assert_instr(vpsubb))]
3242#[stable(feature = "simd_x86", since = "1.27.0")]
3243pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3244 unsafe { transmute(src:simd_sub(lhs:a.as_i8x32(), rhs:b.as_i8x32())) }
3245}
3246
3247/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in
3248/// `a` using saturation.
3249///
3250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi16)
3251#[inline]
3252#[target_feature(enable = "avx2")]
3253#[cfg_attr(test, assert_instr(vpsubsw))]
3254#[stable(feature = "simd_x86", since = "1.27.0")]
3255pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3256 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i16x16(), rhs:b.as_i16x16())) }
3257}
3258
3259/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in
3260/// `a` using saturation.
3261///
3262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi8)
3263#[inline]
3264#[target_feature(enable = "avx2")]
3265#[cfg_attr(test, assert_instr(vpsubsb))]
3266#[stable(feature = "simd_x86", since = "1.27.0")]
3267pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3268 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i8x32(), rhs:b.as_i8x32())) }
3269}
3270
3271/// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit
3272/// integers in `a` using saturation.
3273///
3274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu16)
3275#[inline]
3276#[target_feature(enable = "avx2")]
3277#[cfg_attr(test, assert_instr(vpsubusw))]
3278#[stable(feature = "simd_x86", since = "1.27.0")]
3279pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3280 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u16x16(), rhs:b.as_u16x16())) }
3281}
3282
3283/// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit
3284/// integers in `a` using saturation.
3285///
3286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu8)
3287#[inline]
3288#[target_feature(enable = "avx2")]
3289#[cfg_attr(test, assert_instr(vpsubusb))]
3290#[stable(feature = "simd_x86", since = "1.27.0")]
3291pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3292 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u8x32(), rhs:b.as_u8x32())) }
3293}
3294
3295/// Unpacks and interleave 8-bit integers from the high half of each
3296/// 128-bit lane in `a` and `b`.
3297///
3298/// ```rust
3299/// #[cfg(target_arch = "x86")]
3300/// use std::arch::x86::*;
3301/// #[cfg(target_arch = "x86_64")]
3302/// use std::arch::x86_64::*;
3303///
3304/// # fn main() {
3305/// # if is_x86_feature_detected!("avx2") {
3306/// # #[target_feature(enable = "avx2")]
3307/// # unsafe fn worker() {
3308/// let a = _mm256_setr_epi8(
3309/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3310/// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3311/// );
3312/// let b = _mm256_setr_epi8(
3313/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3314/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3315/// -30, -31,
3316/// );
3317///
3318/// let c = _mm256_unpackhi_epi8(a, b);
3319///
3320/// let expected = _mm256_setr_epi8(
3321/// 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15,
3322/// 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31,
3323/// -31,
3324/// );
3325/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3326///
3327/// # }
3328/// # unsafe { worker(); }
3329/// # }
3330/// # }
3331/// ```
3332///
3333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi8)
3334#[inline]
3335#[target_feature(enable = "avx2")]
3336#[cfg_attr(test, assert_instr(vpunpckhbw))]
3337#[stable(feature = "simd_x86", since = "1.27.0")]
3338pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3339 unsafe {
3340 #[rustfmt::skip]
3341 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3342 8, 40, 9, 41, 10, 42, 11, 43,
3343 12, 44, 13, 45, 14, 46, 15, 47,
3344 24, 56, 25, 57, 26, 58, 27, 59,
3345 28, 60, 29, 61, 30, 62, 31, 63,
3346 ]);
3347 transmute(src:r)
3348 }
3349}
3350
3351/// Unpacks and interleave 8-bit integers from the low half of each
3352/// 128-bit lane of `a` and `b`.
3353///
3354/// ```rust
3355/// #[cfg(target_arch = "x86")]
3356/// use std::arch::x86::*;
3357/// #[cfg(target_arch = "x86_64")]
3358/// use std::arch::x86_64::*;
3359///
3360/// # fn main() {
3361/// # if is_x86_feature_detected!("avx2") {
3362/// # #[target_feature(enable = "avx2")]
3363/// # unsafe fn worker() {
3364/// let a = _mm256_setr_epi8(
3365/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3366/// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3367/// );
3368/// let b = _mm256_setr_epi8(
3369/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3370/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3371/// -30, -31,
3372/// );
3373///
3374/// let c = _mm256_unpacklo_epi8(a, b);
3375///
3376/// let expected = _mm256_setr_epi8(
3377/// 0, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 16, -16, 17,
3378/// -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23,
3379/// );
3380/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3381///
3382/// # }
3383/// # unsafe { worker(); }
3384/// # }
3385/// # }
3386/// ```
3387///
3388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi8)
3389#[inline]
3390#[target_feature(enable = "avx2")]
3391#[cfg_attr(test, assert_instr(vpunpcklbw))]
3392#[stable(feature = "simd_x86", since = "1.27.0")]
3393pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3394 unsafe {
3395 #[rustfmt::skip]
3396 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3397 0, 32, 1, 33, 2, 34, 3, 35,
3398 4, 36, 5, 37, 6, 38, 7, 39,
3399 16, 48, 17, 49, 18, 50, 19, 51,
3400 20, 52, 21, 53, 22, 54, 23, 55,
3401 ]);
3402 transmute(src:r)
3403 }
3404}
3405
3406/// Unpacks and interleave 16-bit integers from the high half of each
3407/// 128-bit lane of `a` and `b`.
3408///
3409/// ```rust
3410/// #[cfg(target_arch = "x86")]
3411/// use std::arch::x86::*;
3412/// #[cfg(target_arch = "x86_64")]
3413/// use std::arch::x86_64::*;
3414///
3415/// # fn main() {
3416/// # if is_x86_feature_detected!("avx2") {
3417/// # #[target_feature(enable = "avx2")]
3418/// # unsafe fn worker() {
3419/// let a = _mm256_setr_epi16(
3420/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3421/// );
3422/// let b = _mm256_setr_epi16(
3423/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3424/// );
3425///
3426/// let c = _mm256_unpackhi_epi16(a, b);
3427///
3428/// let expected = _mm256_setr_epi16(
3429/// 4, -4, 5, -5, 6, -6, 7, -7, 12, -12, 13, -13, 14, -14, 15, -15,
3430/// );
3431/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3432///
3433/// # }
3434/// # unsafe { worker(); }
3435/// # }
3436/// # }
3437/// ```
3438///
3439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi16)
3440#[inline]
3441#[target_feature(enable = "avx2")]
3442#[cfg_attr(test, assert_instr(vpunpckhwd))]
3443#[stable(feature = "simd_x86", since = "1.27.0")]
3444pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3445 unsafe {
3446 let r: i16x16 = simd_shuffle!(
3447 a.as_i16x16(),
3448 b.as_i16x16(),
3449 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3450 );
3451 transmute(src:r)
3452 }
3453}
3454
3455/// Unpacks and interleave 16-bit integers from the low half of each
3456/// 128-bit lane of `a` and `b`.
3457///
3458/// ```rust
3459/// #[cfg(target_arch = "x86")]
3460/// use std::arch::x86::*;
3461/// #[cfg(target_arch = "x86_64")]
3462/// use std::arch::x86_64::*;
3463///
3464/// # fn main() {
3465/// # if is_x86_feature_detected!("avx2") {
3466/// # #[target_feature(enable = "avx2")]
3467/// # unsafe fn worker() {
3468///
3469/// let a = _mm256_setr_epi16(
3470/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3471/// );
3472/// let b = _mm256_setr_epi16(
3473/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3474/// );
3475///
3476/// let c = _mm256_unpacklo_epi16(a, b);
3477///
3478/// let expected = _mm256_setr_epi16(
3479/// 0, 0, 1, -1, 2, -2, 3, -3, 8, -8, 9, -9, 10, -10, 11, -11,
3480/// );
3481/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3482///
3483/// # }
3484/// # unsafe { worker(); }
3485/// # }
3486/// # }
3487/// ```
3488///
3489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi16)
3490#[inline]
3491#[target_feature(enable = "avx2")]
3492#[cfg_attr(test, assert_instr(vpunpcklwd))]
3493#[stable(feature = "simd_x86", since = "1.27.0")]
3494pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3495 unsafe {
3496 let r: i16x16 = simd_shuffle!(
3497 a.as_i16x16(),
3498 b.as_i16x16(),
3499 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3500 );
3501 transmute(src:r)
3502 }
3503}
3504
3505/// Unpacks and interleave 32-bit integers from the high half of each
3506/// 128-bit lane of `a` and `b`.
3507///
3508/// ```rust
3509/// #[cfg(target_arch = "x86")]
3510/// use std::arch::x86::*;
3511/// #[cfg(target_arch = "x86_64")]
3512/// use std::arch::x86_64::*;
3513///
3514/// # fn main() {
3515/// # if is_x86_feature_detected!("avx2") {
3516/// # #[target_feature(enable = "avx2")]
3517/// # unsafe fn worker() {
3518/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3519/// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3520///
3521/// let c = _mm256_unpackhi_epi32(a, b);
3522///
3523/// let expected = _mm256_setr_epi32(2, -2, 3, -3, 6, -6, 7, -7);
3524/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3525///
3526/// # }
3527/// # unsafe { worker(); }
3528/// # }
3529/// # }
3530/// ```
3531///
3532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi32)
3533#[inline]
3534#[target_feature(enable = "avx2")]
3535#[cfg_attr(test, assert_instr(vunpckhps))]
3536#[stable(feature = "simd_x86", since = "1.27.0")]
3537pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3538 unsafe {
3539 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3540 transmute(src:r)
3541 }
3542}
3543
3544/// Unpacks and interleave 32-bit integers from the low half of each
3545/// 128-bit lane of `a` and `b`.
3546///
3547/// ```rust
3548/// #[cfg(target_arch = "x86")]
3549/// use std::arch::x86::*;
3550/// #[cfg(target_arch = "x86_64")]
3551/// use std::arch::x86_64::*;
3552///
3553/// # fn main() {
3554/// # if is_x86_feature_detected!("avx2") {
3555/// # #[target_feature(enable = "avx2")]
3556/// # unsafe fn worker() {
3557/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3558/// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3559///
3560/// let c = _mm256_unpacklo_epi32(a, b);
3561///
3562/// let expected = _mm256_setr_epi32(0, 0, 1, -1, 4, -4, 5, -5);
3563/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3564///
3565/// # }
3566/// # unsafe { worker(); }
3567/// # }
3568/// # }
3569/// ```
3570///
3571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi32)
3572#[inline]
3573#[target_feature(enable = "avx2")]
3574#[cfg_attr(test, assert_instr(vunpcklps))]
3575#[stable(feature = "simd_x86", since = "1.27.0")]
3576pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3577 unsafe {
3578 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3579 transmute(src:r)
3580 }
3581}
3582
3583/// Unpacks and interleave 64-bit integers from the high half of each
3584/// 128-bit lane of `a` and `b`.
3585///
3586/// ```rust
3587/// #[cfg(target_arch = "x86")]
3588/// use std::arch::x86::*;
3589/// #[cfg(target_arch = "x86_64")]
3590/// use std::arch::x86_64::*;
3591///
3592/// # fn main() {
3593/// # if is_x86_feature_detected!("avx2") {
3594/// # #[target_feature(enable = "avx2")]
3595/// # unsafe fn worker() {
3596/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3597/// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3598///
3599/// let c = _mm256_unpackhi_epi64(a, b);
3600///
3601/// let expected = _mm256_setr_epi64x(1, -1, 3, -3);
3602/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3603///
3604/// # }
3605/// # unsafe { worker(); }
3606/// # }
3607/// # }
3608/// ```
3609///
3610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi64)
3611#[inline]
3612#[target_feature(enable = "avx2")]
3613#[cfg_attr(test, assert_instr(vunpckhpd))]
3614#[stable(feature = "simd_x86", since = "1.27.0")]
3615pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3616 unsafe {
3617 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3618 transmute(src:r)
3619 }
3620}
3621
3622/// Unpacks and interleave 64-bit integers from the low half of each
3623/// 128-bit lane of `a` and `b`.
3624///
3625/// ```rust
3626/// #[cfg(target_arch = "x86")]
3627/// use std::arch::x86::*;
3628/// #[cfg(target_arch = "x86_64")]
3629/// use std::arch::x86_64::*;
3630///
3631/// # fn main() {
3632/// # if is_x86_feature_detected!("avx2") {
3633/// # #[target_feature(enable = "avx2")]
3634/// # unsafe fn worker() {
3635/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3636/// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3637///
3638/// let c = _mm256_unpacklo_epi64(a, b);
3639///
3640/// let expected = _mm256_setr_epi64x(0, 0, 2, -2);
3641/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3642///
3643/// # }
3644/// # unsafe { worker(); }
3645/// # }
3646/// # }
3647/// ```
3648///
3649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi64)
3650#[inline]
3651#[target_feature(enable = "avx2")]
3652#[cfg_attr(test, assert_instr(vunpcklpd))]
3653#[stable(feature = "simd_x86", since = "1.27.0")]
3654pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3655 unsafe {
3656 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3657 transmute(src:r)
3658 }
3659}
3660
3661/// Computes the bitwise XOR of 256 bits (representing integer data)
3662/// in `a` and `b`
3663///
3664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_si256)
3665#[inline]
3666#[target_feature(enable = "avx2")]
3667#[cfg_attr(test, assert_instr(vxorps))]
3668#[stable(feature = "simd_x86", since = "1.27.0")]
3669pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3670 unsafe { transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4())) }
3671}
3672
3673/// Extracts an 8-bit integer from `a`, selected with `INDEX`. Returns a 32-bit
3674/// integer containing the zero-extended integer data.
3675///
3676/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
3677///
3678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi8)
3679#[inline]
3680#[target_feature(enable = "avx2")]
3681// This intrinsic has no corresponding instruction.
3682#[rustc_legacy_const_generics(1)]
3683#[stable(feature = "simd_x86", since = "1.27.0")]
3684pub fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3685 static_assert_uimm_bits!(INDEX, 5);
3686 unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3687}
3688
3689/// Extracts a 16-bit integer from `a`, selected with `INDEX`. Returns a 32-bit
3690/// integer containing the zero-extended integer data.
3691///
3692/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
3693///
3694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi16)
3695#[inline]
3696#[target_feature(enable = "avx2")]
3697// This intrinsic has no corresponding instruction.
3698#[rustc_legacy_const_generics(1)]
3699#[stable(feature = "simd_x86", since = "1.27.0")]
3700pub fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3701 static_assert_uimm_bits!(INDEX, 4);
3702 unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3703}
3704
3705#[allow(improper_ctypes)]
3706unsafe extern "C" {
3707 #[link_name = "llvm.x86.avx2.phadd.sw"]
3708 unsafefn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3709 #[link_name = "llvm.x86.avx2.phsub.sw"]
3710 unsafefn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3711 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3712 unsafefn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3713 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3714 unsafefn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
3715 #[link_name = "llvm.x86.avx2.mpsadbw"]
3716 unsafefn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
3717 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3718 unsafefn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3719 #[link_name = "llvm.x86.avx2.packsswb"]
3720 unsafefn packsswb(a: i16x16, b: i16x16) -> i8x32;
3721 #[link_name = "llvm.x86.avx2.packssdw"]
3722 unsafefn packssdw(a: i32x8, b: i32x8) -> i16x16;
3723 #[link_name = "llvm.x86.avx2.packuswb"]
3724 unsafefn packuswb(a: i16x16, b: i16x16) -> u8x32;
3725 #[link_name = "llvm.x86.avx2.packusdw"]
3726 unsafefn packusdw(a: i32x8, b: i32x8) -> u16x16;
3727 #[link_name = "llvm.x86.avx2.psad.bw"]
3728 unsafefn psadbw(a: u8x32, b: u8x32) -> u64x4;
3729 #[link_name = "llvm.x86.avx2.psign.b"]
3730 unsafefn psignb(a: i8x32, b: i8x32) -> i8x32;
3731 #[link_name = "llvm.x86.avx2.psign.w"]
3732 unsafefn psignw(a: i16x16, b: i16x16) -> i16x16;
3733 #[link_name = "llvm.x86.avx2.psign.d"]
3734 unsafefn psignd(a: i32x8, b: i32x8) -> i32x8;
3735 #[link_name = "llvm.x86.avx2.psll.w"]
3736 unsafefn psllw(a: i16x16, count: i16x8) -> i16x16;
3737 #[link_name = "llvm.x86.avx2.psll.d"]
3738 unsafefn pslld(a: i32x8, count: i32x4) -> i32x8;
3739 #[link_name = "llvm.x86.avx2.psll.q"]
3740 unsafefn psllq(a: i64x4, count: i64x2) -> i64x4;
3741 #[link_name = "llvm.x86.avx2.psra.w"]
3742 unsafefn psraw(a: i16x16, count: i16x8) -> i16x16;
3743 #[link_name = "llvm.x86.avx2.psra.d"]
3744 unsafefn psrad(a: i32x8, count: i32x4) -> i32x8;
3745 #[link_name = "llvm.x86.avx2.psrl.w"]
3746 unsafefn psrlw(a: i16x16, count: i16x8) -> i16x16;
3747 #[link_name = "llvm.x86.avx2.psrl.d"]
3748 unsafefn psrld(a: i32x8, count: i32x4) -> i32x8;
3749 #[link_name = "llvm.x86.avx2.psrl.q"]
3750 unsafefn psrlq(a: i64x4, count: i64x2) -> i64x4;
3751 #[link_name = "llvm.x86.avx2.pshuf.b"]
3752 unsafefn pshufb(a: u8x32, b: u8x32) -> u8x32;
3753 #[link_name = "llvm.x86.avx2.permd"]
3754 unsafefn permd(a: u32x8, b: u32x8) -> u32x8;
3755 #[link_name = "llvm.x86.avx2.permps"]
3756 unsafefn permps(a: __m256, b: i32x8) -> __m256;
3757 #[link_name = "llvm.x86.avx2.gather.d.d"]
3758 unsafefn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3759 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3760 unsafefn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3761 #[link_name = "llvm.x86.avx2.gather.d.q"]
3762 unsafefn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3763 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3764 unsafefn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3765 #[link_name = "llvm.x86.avx2.gather.q.d"]
3766 unsafefn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3767 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3768 unsafefn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3769 #[link_name = "llvm.x86.avx2.gather.q.q"]
3770 unsafefn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3771 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3772 unsafefn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3773 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3774 unsafefn pgatherdpd(
3775 src: __m128d,
3776 slice: *const i8,
3777 offsets: i32x4,
3778 mask: __m128d,
3779 scale: i8,
3780 ) -> __m128d;
3781 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3782 unsafefn vpgatherdpd(
3783 src: __m256d,
3784 slice: *const i8,
3785 offsets: i32x4,
3786 mask: __m256d,
3787 scale: i8,
3788 ) -> __m256d;
3789 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3790 unsafefn pgatherqpd(
3791 src: __m128d,
3792 slice: *const i8,
3793 offsets: i64x2,
3794 mask: __m128d,
3795 scale: i8,
3796 ) -> __m128d;
3797 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3798 unsafefn vpgatherqpd(
3799 src: __m256d,
3800 slice: *const i8,
3801 offsets: i64x4,
3802 mask: __m256d,
3803 scale: i8,
3804 ) -> __m256d;
3805 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3806 unsafefn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3807 -> __m128;
3808 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3809 unsafefn vpgatherdps(
3810 src: __m256,
3811 slice: *const i8,
3812 offsets: i32x8,
3813 mask: __m256,
3814 scale: i8,
3815 ) -> __m256;
3816 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3817 unsafefn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3818 -> __m128;
3819 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3820 unsafefn vpgatherqps(
3821 src: __m128,
3822 slice: *const i8,
3823 offsets: i64x4,
3824 mask: __m128,
3825 scale: i8,
3826 ) -> __m128;
3827}
3828
3829#[cfg(test)]
3830mod tests {
3831
3832 use stdarch_test::simd_test;
3833
3834 use crate::core_arch::x86::*;
3835
3836 #[simd_test(enable = "avx2")]
3837 unsafe fn test_mm256_abs_epi32() {
3838 #[rustfmt::skip]
3839 let a = _mm256_setr_epi32(
3840 0, 1, -1, i32::MAX,
3841 i32::MIN, 100, -100, -32,
3842 );
3843 let r = _mm256_abs_epi32(a);
3844 #[rustfmt::skip]
3845 let e = _mm256_setr_epi32(
3846 0, 1, 1, i32::MAX,
3847 i32::MAX.wrapping_add(1), 100, 100, 32,
3848 );
3849 assert_eq_m256i(r, e);
3850 }
3851
3852 #[simd_test(enable = "avx2")]
3853 unsafe fn test_mm256_abs_epi16() {
3854 #[rustfmt::skip]
3855 let a = _mm256_setr_epi16(
3856 0, 1, -1, 2, -2, 3, -3, 4,
3857 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3858 );
3859 let r = _mm256_abs_epi16(a);
3860 #[rustfmt::skip]
3861 let e = _mm256_setr_epi16(
3862 0, 1, 1, 2, 2, 3, 3, 4,
3863 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3864 );
3865 assert_eq_m256i(r, e);
3866 }
3867
3868 #[simd_test(enable = "avx2")]
3869 unsafe fn test_mm256_abs_epi8() {
3870 #[rustfmt::skip]
3871 let a = _mm256_setr_epi8(
3872 0, 1, -1, 2, -2, 3, -3, 4,
3873 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3874 0, 1, -1, 2, -2, 3, -3, 4,
3875 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3876 );
3877 let r = _mm256_abs_epi8(a);
3878 #[rustfmt::skip]
3879 let e = _mm256_setr_epi8(
3880 0, 1, 1, 2, 2, 3, 3, 4,
3881 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3882 0, 1, 1, 2, 2, 3, 3, 4,
3883 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3884 );
3885 assert_eq_m256i(r, e);
3886 }
3887
3888 #[simd_test(enable = "avx2")]
3889 unsafe fn test_mm256_add_epi64() {
3890 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
3891 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
3892 let r = _mm256_add_epi64(a, b);
3893 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
3894 assert_eq_m256i(r, e);
3895 }
3896
3897 #[simd_test(enable = "avx2")]
3898 unsafe fn test_mm256_add_epi32() {
3899 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
3900 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3901 let r = _mm256_add_epi32(a, b);
3902 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
3903 assert_eq_m256i(r, e);
3904 }
3905
3906 #[simd_test(enable = "avx2")]
3907 unsafe fn test_mm256_add_epi16() {
3908 #[rustfmt::skip]
3909 let a = _mm256_setr_epi16(
3910 0, 1, 2, 3, 4, 5, 6, 7,
3911 8, 9, 10, 11, 12, 13, 14, 15,
3912 );
3913 #[rustfmt::skip]
3914 let b = _mm256_setr_epi16(
3915 0, 1, 2, 3, 4, 5, 6, 7,
3916 8, 9, 10, 11, 12, 13, 14, 15,
3917 );
3918 let r = _mm256_add_epi16(a, b);
3919 #[rustfmt::skip]
3920 let e = _mm256_setr_epi16(
3921 0, 2, 4, 6, 8, 10, 12, 14,
3922 16, 18, 20, 22, 24, 26, 28, 30,
3923 );
3924 assert_eq_m256i(r, e);
3925 }
3926
3927 #[simd_test(enable = "avx2")]
3928 unsafe fn test_mm256_add_epi8() {
3929 #[rustfmt::skip]
3930 let a = _mm256_setr_epi8(
3931 0, 1, 2, 3, 4, 5, 6, 7,
3932 8, 9, 10, 11, 12, 13, 14, 15,
3933 16, 17, 18, 19, 20, 21, 22, 23,
3934 24, 25, 26, 27, 28, 29, 30, 31,
3935 );
3936 #[rustfmt::skip]
3937 let b = _mm256_setr_epi8(
3938 0, 1, 2, 3, 4, 5, 6, 7,
3939 8, 9, 10, 11, 12, 13, 14, 15,
3940 16, 17, 18, 19, 20, 21, 22, 23,
3941 24, 25, 26, 27, 28, 29, 30, 31,
3942 );
3943 let r = _mm256_add_epi8(a, b);
3944 #[rustfmt::skip]
3945 let e = _mm256_setr_epi8(
3946 0, 2, 4, 6, 8, 10, 12, 14,
3947 16, 18, 20, 22, 24, 26, 28, 30,
3948 32, 34, 36, 38, 40, 42, 44, 46,
3949 48, 50, 52, 54, 56, 58, 60, 62,
3950 );
3951 assert_eq_m256i(r, e);
3952 }
3953
3954 #[simd_test(enable = "avx2")]
3955 unsafe fn test_mm256_adds_epi8() {
3956 #[rustfmt::skip]
3957 let a = _mm256_setr_epi8(
3958 0, 1, 2, 3, 4, 5, 6, 7,
3959 8, 9, 10, 11, 12, 13, 14, 15,
3960 16, 17, 18, 19, 20, 21, 22, 23,
3961 24, 25, 26, 27, 28, 29, 30, 31,
3962 );
3963 #[rustfmt::skip]
3964 let b = _mm256_setr_epi8(
3965 32, 33, 34, 35, 36, 37, 38, 39,
3966 40, 41, 42, 43, 44, 45, 46, 47,
3967 48, 49, 50, 51, 52, 53, 54, 55,
3968 56, 57, 58, 59, 60, 61, 62, 63,
3969 );
3970 let r = _mm256_adds_epi8(a, b);
3971 #[rustfmt::skip]
3972 let e = _mm256_setr_epi8(
3973 32, 34, 36, 38, 40, 42, 44, 46,
3974 48, 50, 52, 54, 56, 58, 60, 62,
3975 64, 66, 68, 70, 72, 74, 76, 78,
3976 80, 82, 84, 86, 88, 90, 92, 94,
3977 );
3978 assert_eq_m256i(r, e);
3979 }
3980
3981 #[simd_test(enable = "avx2")]
3982 unsafe fn test_mm256_adds_epi8_saturate_positive() {
3983 let a = _mm256_set1_epi8(0x7F);
3984 let b = _mm256_set1_epi8(1);
3985 let r = _mm256_adds_epi8(a, b);
3986 assert_eq_m256i(r, a);
3987 }
3988
3989 #[simd_test(enable = "avx2")]
3990 unsafe fn test_mm256_adds_epi8_saturate_negative() {
3991 let a = _mm256_set1_epi8(-0x80);
3992 let b = _mm256_set1_epi8(-1);
3993 let r = _mm256_adds_epi8(a, b);
3994 assert_eq_m256i(r, a);
3995 }
3996
3997 #[simd_test(enable = "avx2")]
3998 unsafe fn test_mm256_adds_epi16() {
3999 #[rustfmt::skip]
4000 let a = _mm256_setr_epi16(
4001 0, 1, 2, 3, 4, 5, 6, 7,
4002 8, 9, 10, 11, 12, 13, 14, 15,
4003 );
4004 #[rustfmt::skip]
4005 let b = _mm256_setr_epi16(
4006 32, 33, 34, 35, 36, 37, 38, 39,
4007 40, 41, 42, 43, 44, 45, 46, 47,
4008 );
4009 let r = _mm256_adds_epi16(a, b);
4010 #[rustfmt::skip]
4011 let e = _mm256_setr_epi16(
4012 32, 34, 36, 38, 40, 42, 44, 46,
4013 48, 50, 52, 54, 56, 58, 60, 62,
4014 );
4015
4016 assert_eq_m256i(r, e);
4017 }
4018
4019 #[simd_test(enable = "avx2")]
4020 unsafe fn test_mm256_adds_epi16_saturate_positive() {
4021 let a = _mm256_set1_epi16(0x7FFF);
4022 let b = _mm256_set1_epi16(1);
4023 let r = _mm256_adds_epi16(a, b);
4024 assert_eq_m256i(r, a);
4025 }
4026
4027 #[simd_test(enable = "avx2")]
4028 unsafe fn test_mm256_adds_epi16_saturate_negative() {
4029 let a = _mm256_set1_epi16(-0x8000);
4030 let b = _mm256_set1_epi16(-1);
4031 let r = _mm256_adds_epi16(a, b);
4032 assert_eq_m256i(r, a);
4033 }
4034
4035 #[simd_test(enable = "avx2")]
4036 unsafe fn test_mm256_adds_epu8() {
4037 #[rustfmt::skip]
4038 let a = _mm256_setr_epi8(
4039 0, 1, 2, 3, 4, 5, 6, 7,
4040 8, 9, 10, 11, 12, 13, 14, 15,
4041 16, 17, 18, 19, 20, 21, 22, 23,
4042 24, 25, 26, 27, 28, 29, 30, 31,
4043 );
4044 #[rustfmt::skip]
4045 let b = _mm256_setr_epi8(
4046 32, 33, 34, 35, 36, 37, 38, 39,
4047 40, 41, 42, 43, 44, 45, 46, 47,
4048 48, 49, 50, 51, 52, 53, 54, 55,
4049 56, 57, 58, 59, 60, 61, 62, 63,
4050 );
4051 let r = _mm256_adds_epu8(a, b);
4052 #[rustfmt::skip]
4053 let e = _mm256_setr_epi8(
4054 32, 34, 36, 38, 40, 42, 44, 46,
4055 48, 50, 52, 54, 56, 58, 60, 62,
4056 64, 66, 68, 70, 72, 74, 76, 78,
4057 80, 82, 84, 86, 88, 90, 92, 94,
4058 );
4059 assert_eq_m256i(r, e);
4060 }
4061
4062 #[simd_test(enable = "avx2")]
4063 unsafe fn test_mm256_adds_epu8_saturate() {
4064 let a = _mm256_set1_epi8(!0);
4065 let b = _mm256_set1_epi8(1);
4066 let r = _mm256_adds_epu8(a, b);
4067 assert_eq_m256i(r, a);
4068 }
4069
4070 #[simd_test(enable = "avx2")]
4071 unsafe fn test_mm256_adds_epu16() {
4072 #[rustfmt::skip]
4073 let a = _mm256_setr_epi16(
4074 0, 1, 2, 3, 4, 5, 6, 7,
4075 8, 9, 10, 11, 12, 13, 14, 15,
4076 );
4077 #[rustfmt::skip]
4078 let b = _mm256_setr_epi16(
4079 32, 33, 34, 35, 36, 37, 38, 39,
4080 40, 41, 42, 43, 44, 45, 46, 47,
4081 );
4082 let r = _mm256_adds_epu16(a, b);
4083 #[rustfmt::skip]
4084 let e = _mm256_setr_epi16(
4085 32, 34, 36, 38, 40, 42, 44, 46,
4086 48, 50, 52, 54, 56, 58, 60, 62,
4087 );
4088
4089 assert_eq_m256i(r, e);
4090 }
4091
4092 #[simd_test(enable = "avx2")]
4093 unsafe fn test_mm256_adds_epu16_saturate() {
4094 let a = _mm256_set1_epi16(!0);
4095 let b = _mm256_set1_epi16(1);
4096 let r = _mm256_adds_epu16(a, b);
4097 assert_eq_m256i(r, a);
4098 }
4099
4100 #[simd_test(enable = "avx2")]
4101 unsafe fn test_mm256_and_si256() {
4102 let a = _mm256_set1_epi8(5);
4103 let b = _mm256_set1_epi8(3);
4104 let got = _mm256_and_si256(a, b);
4105 assert_eq_m256i(got, _mm256_set1_epi8(1));
4106 }
4107
4108 #[simd_test(enable = "avx2")]
4109 unsafe fn test_mm256_andnot_si256() {
4110 let a = _mm256_set1_epi8(5);
4111 let b = _mm256_set1_epi8(3);
4112 let got = _mm256_andnot_si256(a, b);
4113 assert_eq_m256i(got, _mm256_set1_epi8(2));
4114 }
4115
4116 #[simd_test(enable = "avx2")]
4117 unsafe fn test_mm256_avg_epu8() {
4118 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4119 let r = _mm256_avg_epu8(a, b);
4120 assert_eq_m256i(r, _mm256_set1_epi8(6));
4121 }
4122
4123 #[simd_test(enable = "avx2")]
4124 unsafe fn test_mm256_avg_epu16() {
4125 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4126 let r = _mm256_avg_epu16(a, b);
4127 assert_eq_m256i(r, _mm256_set1_epi16(6));
4128 }
4129
4130 #[simd_test(enable = "avx2")]
4131 unsafe fn test_mm_blend_epi32() {
4132 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4133 let e = _mm_setr_epi32(9, 3, 3, 3);
4134 let r = _mm_blend_epi32::<0x01>(a, b);
4135 assert_eq_m128i(r, e);
4136
4137 let r = _mm_blend_epi32::<0x0E>(b, a);
4138 assert_eq_m128i(r, e);
4139 }
4140
4141 #[simd_test(enable = "avx2")]
4142 unsafe fn test_mm256_blend_epi32() {
4143 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4144 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4145 let r = _mm256_blend_epi32::<0x01>(a, b);
4146 assert_eq_m256i(r, e);
4147
4148 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4149 let r = _mm256_blend_epi32::<0x82>(a, b);
4150 assert_eq_m256i(r, e);
4151
4152 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4153 let r = _mm256_blend_epi32::<0x7C>(a, b);
4154 assert_eq_m256i(r, e);
4155 }
4156
4157 #[simd_test(enable = "avx2")]
4158 unsafe fn test_mm256_blend_epi16() {
4159 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4160 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4161 let r = _mm256_blend_epi16::<0x01>(a, b);
4162 assert_eq_m256i(r, e);
4163
4164 let r = _mm256_blend_epi16::<0xFE>(b, a);
4165 assert_eq_m256i(r, e);
4166 }
4167
4168 #[simd_test(enable = "avx2")]
4169 unsafe fn test_mm256_blendv_epi8() {
4170 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4171 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4172 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4173 let r = _mm256_blendv_epi8(a, b, mask);
4174 assert_eq_m256i(r, e);
4175 }
4176
4177 #[simd_test(enable = "avx2")]
4178 unsafe fn test_mm_broadcastb_epi8() {
4179 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4180 let res = _mm_broadcastb_epi8(a);
4181 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4182 }
4183
4184 #[simd_test(enable = "avx2")]
4185 unsafe fn test_mm256_broadcastb_epi8() {
4186 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4187 let res = _mm256_broadcastb_epi8(a);
4188 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4189 }
4190
4191 #[simd_test(enable = "avx2")]
4192 unsafe fn test_mm_broadcastd_epi32() {
4193 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4194 let res = _mm_broadcastd_epi32(a);
4195 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4196 }
4197
4198 #[simd_test(enable = "avx2")]
4199 unsafe fn test_mm256_broadcastd_epi32() {
4200 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4201 let res = _mm256_broadcastd_epi32(a);
4202 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4203 }
4204
4205 #[simd_test(enable = "avx2")]
4206 unsafe fn test_mm_broadcastq_epi64() {
4207 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4208 let res = _mm_broadcastq_epi64(a);
4209 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4210 }
4211
4212 #[simd_test(enable = "avx2")]
4213 unsafe fn test_mm256_broadcastq_epi64() {
4214 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4215 let res = _mm256_broadcastq_epi64(a);
4216 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4217 }
4218
4219 #[simd_test(enable = "avx2")]
4220 unsafe fn test_mm_broadcastsd_pd() {
4221 let a = _mm_setr_pd(6.88, 3.44);
4222 let res = _mm_broadcastsd_pd(a);
4223 assert_eq_m128d(res, _mm_set1_pd(6.88));
4224 }
4225
4226 #[simd_test(enable = "avx2")]
4227 unsafe fn test_mm256_broadcastsd_pd() {
4228 let a = _mm_setr_pd(6.88, 3.44);
4229 let res = _mm256_broadcastsd_pd(a);
4230 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4231 }
4232
4233 #[simd_test(enable = "avx2")]
4234 unsafe fn test_mm_broadcastsi128_si256() {
4235 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4236 let res = _mm_broadcastsi128_si256(a);
4237 let retval = _mm256_setr_epi64x(
4238 0x0987654321012334,
4239 0x5678909876543210,
4240 0x0987654321012334,
4241 0x5678909876543210,
4242 );
4243 assert_eq_m256i(res, retval);
4244 }
4245
4246 #[simd_test(enable = "avx2")]
4247 unsafe fn test_mm256_broadcastsi128_si256() {
4248 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4249 let res = _mm256_broadcastsi128_si256(a);
4250 let retval = _mm256_setr_epi64x(
4251 0x0987654321012334,
4252 0x5678909876543210,
4253 0x0987654321012334,
4254 0x5678909876543210,
4255 );
4256 assert_eq_m256i(res, retval);
4257 }
4258
4259 #[simd_test(enable = "avx2")]
4260 unsafe fn test_mm_broadcastss_ps() {
4261 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4262 let res = _mm_broadcastss_ps(a);
4263 assert_eq_m128(res, _mm_set1_ps(6.88));
4264 }
4265
4266 #[simd_test(enable = "avx2")]
4267 unsafe fn test_mm256_broadcastss_ps() {
4268 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4269 let res = _mm256_broadcastss_ps(a);
4270 assert_eq_m256(res, _mm256_set1_ps(6.88));
4271 }
4272
4273 #[simd_test(enable = "avx2")]
4274 unsafe fn test_mm_broadcastw_epi16() {
4275 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4276 let res = _mm_broadcastw_epi16(a);
4277 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4278 }
4279
4280 #[simd_test(enable = "avx2")]
4281 unsafe fn test_mm256_broadcastw_epi16() {
4282 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4283 let res = _mm256_broadcastw_epi16(a);
4284 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4285 }
4286
4287 #[simd_test(enable = "avx2")]
4288 unsafe fn test_mm256_cmpeq_epi8() {
4289 #[rustfmt::skip]
4290 let a = _mm256_setr_epi8(
4291 0, 1, 2, 3, 4, 5, 6, 7,
4292 8, 9, 10, 11, 12, 13, 14, 15,
4293 16, 17, 18, 19, 20, 21, 22, 23,
4294 24, 25, 26, 27, 28, 29, 30, 31,
4295 );
4296 #[rustfmt::skip]
4297 let b = _mm256_setr_epi8(
4298 31, 30, 2, 28, 27, 26, 25, 24,
4299 23, 22, 21, 20, 19, 18, 17, 16,
4300 15, 14, 13, 12, 11, 10, 9, 8,
4301 7, 6, 5, 4, 3, 2, 1, 0,
4302 );
4303 let r = _mm256_cmpeq_epi8(a, b);
4304 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4305 }
4306
4307 #[simd_test(enable = "avx2")]
4308 unsafe fn test_mm256_cmpeq_epi16() {
4309 #[rustfmt::skip]
4310 let a = _mm256_setr_epi16(
4311 0, 1, 2, 3, 4, 5, 6, 7,
4312 8, 9, 10, 11, 12, 13, 14, 15,
4313 );
4314 #[rustfmt::skip]
4315 let b = _mm256_setr_epi16(
4316 15, 14, 2, 12, 11, 10, 9, 8,
4317 7, 6, 5, 4, 3, 2, 1, 0,
4318 );
4319 let r = _mm256_cmpeq_epi16(a, b);
4320 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4321 }
4322
4323 #[simd_test(enable = "avx2")]
4324 unsafe fn test_mm256_cmpeq_epi32() {
4325 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4326 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4327 let r = _mm256_cmpeq_epi32(a, b);
4328 let e = _mm256_set1_epi32(0);
4329 let e = _mm256_insert_epi32::<2>(e, !0);
4330 assert_eq_m256i(r, e);
4331 }
4332
4333 #[simd_test(enable = "avx2")]
4334 unsafe fn test_mm256_cmpeq_epi64() {
4335 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4336 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4337 let r = _mm256_cmpeq_epi64(a, b);
4338 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4339 }
4340
4341 #[simd_test(enable = "avx2")]
4342 unsafe fn test_mm256_cmpgt_epi8() {
4343 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4344 let b = _mm256_set1_epi8(0);
4345 let r = _mm256_cmpgt_epi8(a, b);
4346 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4347 }
4348
4349 #[simd_test(enable = "avx2")]
4350 unsafe fn test_mm256_cmpgt_epi16() {
4351 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4352 let b = _mm256_set1_epi16(0);
4353 let r = _mm256_cmpgt_epi16(a, b);
4354 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4355 }
4356
4357 #[simd_test(enable = "avx2")]
4358 unsafe fn test_mm256_cmpgt_epi32() {
4359 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4360 let b = _mm256_set1_epi32(0);
4361 let r = _mm256_cmpgt_epi32(a, b);
4362 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4363 }
4364
4365 #[simd_test(enable = "avx2")]
4366 unsafe fn test_mm256_cmpgt_epi64() {
4367 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4368 let b = _mm256_set1_epi64x(0);
4369 let r = _mm256_cmpgt_epi64(a, b);
4370 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4371 }
4372
4373 #[simd_test(enable = "avx2")]
4374 unsafe fn test_mm256_cvtepi8_epi16() {
4375 #[rustfmt::skip]
4376 let a = _mm_setr_epi8(
4377 0, 0, -1, 1, -2, 2, -3, 3,
4378 -4, 4, -5, 5, -6, 6, -7, 7,
4379 );
4380 #[rustfmt::skip]
4381 let r = _mm256_setr_epi16(
4382 0, 0, -1, 1, -2, 2, -3, 3,
4383 -4, 4, -5, 5, -6, 6, -7, 7,
4384 );
4385 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4386 }
4387
4388 #[simd_test(enable = "avx2")]
4389 unsafe fn test_mm256_cvtepi8_epi32() {
4390 #[rustfmt::skip]
4391 let a = _mm_setr_epi8(
4392 0, 0, -1, 1, -2, 2, -3, 3,
4393 -4, 4, -5, 5, -6, 6, -7, 7,
4394 );
4395 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4396 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4397 }
4398
4399 #[simd_test(enable = "avx2")]
4400 unsafe fn test_mm256_cvtepi8_epi64() {
4401 #[rustfmt::skip]
4402 let a = _mm_setr_epi8(
4403 0, 0, -1, 1, -2, 2, -3, 3,
4404 -4, 4, -5, 5, -6, 6, -7, 7,
4405 );
4406 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4407 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4408 }
4409
4410 #[simd_test(enable = "avx2")]
4411 unsafe fn test_mm256_cvtepi16_epi32() {
4412 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4413 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4414 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4415 }
4416
4417 #[simd_test(enable = "avx2")]
4418 unsafe fn test_mm256_cvtepi16_epi64() {
4419 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4420 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4421 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4422 }
4423
4424 #[simd_test(enable = "avx2")]
4425 unsafe fn test_mm256_cvtepi32_epi64() {
4426 let a = _mm_setr_epi32(0, 0, -1, 1);
4427 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4428 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4429 }
4430
4431 #[simd_test(enable = "avx2")]
4432 unsafe fn test_mm256_cvtepu16_epi32() {
4433 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4434 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4435 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4436 }
4437
4438 #[simd_test(enable = "avx2")]
4439 unsafe fn test_mm256_cvtepu16_epi64() {
4440 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4441 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4442 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4443 }
4444
4445 #[simd_test(enable = "avx2")]
4446 unsafe fn test_mm256_cvtepu32_epi64() {
4447 let a = _mm_setr_epi32(0, 1, 2, 3);
4448 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4449 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4450 }
4451
4452 #[simd_test(enable = "avx2")]
4453 unsafe fn test_mm256_cvtepu8_epi16() {
4454 #[rustfmt::skip]
4455 let a = _mm_setr_epi8(
4456 0, 1, 2, 3, 4, 5, 6, 7,
4457 8, 9, 10, 11, 12, 13, 14, 15,
4458 );
4459 #[rustfmt::skip]
4460 let r = _mm256_setr_epi16(
4461 0, 1, 2, 3, 4, 5, 6, 7,
4462 8, 9, 10, 11, 12, 13, 14, 15,
4463 );
4464 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4465 }
4466
4467 #[simd_test(enable = "avx2")]
4468 unsafe fn test_mm256_cvtepu8_epi32() {
4469 #[rustfmt::skip]
4470 let a = _mm_setr_epi8(
4471 0, 1, 2, 3, 4, 5, 6, 7,
4472 8, 9, 10, 11, 12, 13, 14, 15,
4473 );
4474 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4475 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4476 }
4477
4478 #[simd_test(enable = "avx2")]
4479 unsafe fn test_mm256_cvtepu8_epi64() {
4480 #[rustfmt::skip]
4481 let a = _mm_setr_epi8(
4482 0, 1, 2, 3, 4, 5, 6, 7,
4483 8, 9, 10, 11, 12, 13, 14, 15,
4484 );
4485 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4486 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4487 }
4488
4489 #[simd_test(enable = "avx2")]
4490 unsafe fn test_mm256_extracti128_si256() {
4491 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4492 let r = _mm256_extracti128_si256::<1>(a);
4493 let e = _mm_setr_epi64x(3, 4);
4494 assert_eq_m128i(r, e);
4495 }
4496
4497 #[simd_test(enable = "avx2")]
4498 unsafe fn test_mm256_hadd_epi16() {
4499 let a = _mm256_set1_epi16(2);
4500 let b = _mm256_set1_epi16(4);
4501 let r = _mm256_hadd_epi16(a, b);
4502 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4503 assert_eq_m256i(r, e);
4504 }
4505
4506 #[simd_test(enable = "avx2")]
4507 unsafe fn test_mm256_hadd_epi32() {
4508 let a = _mm256_set1_epi32(2);
4509 let b = _mm256_set1_epi32(4);
4510 let r = _mm256_hadd_epi32(a, b);
4511 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4512 assert_eq_m256i(r, e);
4513 }
4514
4515 #[simd_test(enable = "avx2")]
4516 unsafe fn test_mm256_hadds_epi16() {
4517 let a = _mm256_set1_epi16(2);
4518 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4519 let a = _mm256_insert_epi16::<1>(a, 1);
4520 let b = _mm256_set1_epi16(4);
4521 let r = _mm256_hadds_epi16(a, b);
4522 #[rustfmt::skip]
4523 let e = _mm256_setr_epi16(
4524 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4525 4, 4, 4, 4, 8, 8, 8, 8,
4526 );
4527 assert_eq_m256i(r, e);
4528 }
4529
4530 #[simd_test(enable = "avx2")]
4531 unsafe fn test_mm256_hsub_epi16() {
4532 let a = _mm256_set1_epi16(2);
4533 let b = _mm256_set1_epi16(4);
4534 let r = _mm256_hsub_epi16(a, b);
4535 let e = _mm256_set1_epi16(0);
4536 assert_eq_m256i(r, e);
4537 }
4538
4539 #[simd_test(enable = "avx2")]
4540 unsafe fn test_mm256_hsub_epi32() {
4541 let a = _mm256_set1_epi32(2);
4542 let b = _mm256_set1_epi32(4);
4543 let r = _mm256_hsub_epi32(a, b);
4544 let e = _mm256_set1_epi32(0);
4545 assert_eq_m256i(r, e);
4546 }
4547
4548 #[simd_test(enable = "avx2")]
4549 unsafe fn test_mm256_hsubs_epi16() {
4550 let a = _mm256_set1_epi16(2);
4551 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4552 let a = _mm256_insert_epi16::<1>(a, -1);
4553 let b = _mm256_set1_epi16(4);
4554 let r = _mm256_hsubs_epi16(a, b);
4555 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4556 assert_eq_m256i(r, e);
4557 }
4558
4559 #[simd_test(enable = "avx2")]
4560 unsafe fn test_mm256_madd_epi16() {
4561 let a = _mm256_set1_epi16(2);
4562 let b = _mm256_set1_epi16(4);
4563 let r = _mm256_madd_epi16(a, b);
4564 let e = _mm256_set1_epi32(16);
4565 assert_eq_m256i(r, e);
4566 }
4567
4568 #[simd_test(enable = "avx2")]
4569 unsafe fn test_mm256_inserti128_si256() {
4570 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4571 let b = _mm_setr_epi64x(7, 8);
4572 let r = _mm256_inserti128_si256::<1>(a, b);
4573 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4574 assert_eq_m256i(r, e);
4575 }
4576
4577 #[simd_test(enable = "avx2")]
4578 unsafe fn test_mm256_maddubs_epi16() {
4579 let a = _mm256_set1_epi8(2);
4580 let b = _mm256_set1_epi8(4);
4581 let r = _mm256_maddubs_epi16(a, b);
4582 let e = _mm256_set1_epi16(16);
4583 assert_eq_m256i(r, e);
4584 }
4585
4586 #[simd_test(enable = "avx2")]
4587 unsafe fn test_mm_maskload_epi32() {
4588 let nums = [1, 2, 3, 4];
4589 let a = &nums as *const i32;
4590 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4591 let r = _mm_maskload_epi32(a, mask);
4592 let e = _mm_setr_epi32(1, 0, 0, 4);
4593 assert_eq_m128i(r, e);
4594 }
4595
4596 #[simd_test(enable = "avx2")]
4597 unsafe fn test_mm256_maskload_epi32() {
4598 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4599 let a = &nums as *const i32;
4600 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4601 let r = _mm256_maskload_epi32(a, mask);
4602 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4603 assert_eq_m256i(r, e);
4604 }
4605
4606 #[simd_test(enable = "avx2")]
4607 unsafe fn test_mm_maskload_epi64() {
4608 let nums = [1_i64, 2_i64];
4609 let a = &nums as *const i64;
4610 let mask = _mm_setr_epi64x(0, -1);
4611 let r = _mm_maskload_epi64(a, mask);
4612 let e = _mm_setr_epi64x(0, 2);
4613 assert_eq_m128i(r, e);
4614 }
4615
4616 #[simd_test(enable = "avx2")]
4617 unsafe fn test_mm256_maskload_epi64() {
4618 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4619 let a = &nums as *const i64;
4620 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4621 let r = _mm256_maskload_epi64(a, mask);
4622 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4623 assert_eq_m256i(r, e);
4624 }
4625
4626 #[simd_test(enable = "avx2")]
4627 unsafe fn test_mm_maskstore_epi32() {
4628 let a = _mm_setr_epi32(1, 2, 3, 4);
4629 let mut arr = [-1, -1, -1, -1];
4630 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4631 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4632 let e = [1, -1, -1, 4];
4633 assert_eq!(arr, e);
4634 }
4635
4636 #[simd_test(enable = "avx2")]
4637 unsafe fn test_mm256_maskstore_epi32() {
4638 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4639 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4640 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4641 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4642 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4643 assert_eq!(arr, e);
4644 }
4645
4646 #[simd_test(enable = "avx2")]
4647 unsafe fn test_mm_maskstore_epi64() {
4648 let a = _mm_setr_epi64x(1_i64, 2_i64);
4649 let mut arr = [-1_i64, -1_i64];
4650 let mask = _mm_setr_epi64x(0, -1);
4651 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4652 let e = [-1, 2];
4653 assert_eq!(arr, e);
4654 }
4655
4656 #[simd_test(enable = "avx2")]
4657 unsafe fn test_mm256_maskstore_epi64() {
4658 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4659 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4660 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4661 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4662 let e = [-1, 2, 3, -1];
4663 assert_eq!(arr, e);
4664 }
4665
4666 #[simd_test(enable = "avx2")]
4667 unsafe fn test_mm256_max_epi16() {
4668 let a = _mm256_set1_epi16(2);
4669 let b = _mm256_set1_epi16(4);
4670 let r = _mm256_max_epi16(a, b);
4671 assert_eq_m256i(r, b);
4672 }
4673
4674 #[simd_test(enable = "avx2")]
4675 unsafe fn test_mm256_max_epi32() {
4676 let a = _mm256_set1_epi32(2);
4677 let b = _mm256_set1_epi32(4);
4678 let r = _mm256_max_epi32(a, b);
4679 assert_eq_m256i(r, b);
4680 }
4681
4682 #[simd_test(enable = "avx2")]
4683 unsafe fn test_mm256_max_epi8() {
4684 let a = _mm256_set1_epi8(2);
4685 let b = _mm256_set1_epi8(4);
4686 let r = _mm256_max_epi8(a, b);
4687 assert_eq_m256i(r, b);
4688 }
4689
4690 #[simd_test(enable = "avx2")]
4691 unsafe fn test_mm256_max_epu16() {
4692 let a = _mm256_set1_epi16(2);
4693 let b = _mm256_set1_epi16(4);
4694 let r = _mm256_max_epu16(a, b);
4695 assert_eq_m256i(r, b);
4696 }
4697
4698 #[simd_test(enable = "avx2")]
4699 unsafe fn test_mm256_max_epu32() {
4700 let a = _mm256_set1_epi32(2);
4701 let b = _mm256_set1_epi32(4);
4702 let r = _mm256_max_epu32(a, b);
4703 assert_eq_m256i(r, b);
4704 }
4705
4706 #[simd_test(enable = "avx2")]
4707 unsafe fn test_mm256_max_epu8() {
4708 let a = _mm256_set1_epi8(2);
4709 let b = _mm256_set1_epi8(4);
4710 let r = _mm256_max_epu8(a, b);
4711 assert_eq_m256i(r, b);
4712 }
4713
4714 #[simd_test(enable = "avx2")]
4715 unsafe fn test_mm256_min_epi16() {
4716 let a = _mm256_set1_epi16(2);
4717 let b = _mm256_set1_epi16(4);
4718 let r = _mm256_min_epi16(a, b);
4719 assert_eq_m256i(r, a);
4720 }
4721
4722 #[simd_test(enable = "avx2")]
4723 unsafe fn test_mm256_min_epi32() {
4724 let a = _mm256_set1_epi32(2);
4725 let b = _mm256_set1_epi32(4);
4726 let r = _mm256_min_epi32(a, b);
4727 assert_eq_m256i(r, a);
4728 }
4729
4730 #[simd_test(enable = "avx2")]
4731 unsafe fn test_mm256_min_epi8() {
4732 let a = _mm256_set1_epi8(2);
4733 let b = _mm256_set1_epi8(4);
4734 let r = _mm256_min_epi8(a, b);
4735 assert_eq_m256i(r, a);
4736 }
4737
4738 #[simd_test(enable = "avx2")]
4739 unsafe fn test_mm256_min_epu16() {
4740 let a = _mm256_set1_epi16(2);
4741 let b = _mm256_set1_epi16(4);
4742 let r = _mm256_min_epu16(a, b);
4743 assert_eq_m256i(r, a);
4744 }
4745
4746 #[simd_test(enable = "avx2")]
4747 unsafe fn test_mm256_min_epu32() {
4748 let a = _mm256_set1_epi32(2);
4749 let b = _mm256_set1_epi32(4);
4750 let r = _mm256_min_epu32(a, b);
4751 assert_eq_m256i(r, a);
4752 }
4753
4754 #[simd_test(enable = "avx2")]
4755 unsafe fn test_mm256_min_epu8() {
4756 let a = _mm256_set1_epi8(2);
4757 let b = _mm256_set1_epi8(4);
4758 let r = _mm256_min_epu8(a, b);
4759 assert_eq_m256i(r, a);
4760 }
4761
4762 #[simd_test(enable = "avx2")]
4763 unsafe fn test_mm256_movemask_epi8() {
4764 let a = _mm256_set1_epi8(-1);
4765 let r = _mm256_movemask_epi8(a);
4766 let e = -1;
4767 assert_eq!(r, e);
4768 }
4769
4770 #[simd_test(enable = "avx2")]
4771 unsafe fn test_mm256_mpsadbw_epu8() {
4772 let a = _mm256_set1_epi8(2);
4773 let b = _mm256_set1_epi8(4);
4774 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4775 let e = _mm256_set1_epi16(8);
4776 assert_eq_m256i(r, e);
4777 }
4778
4779 #[simd_test(enable = "avx2")]
4780 unsafe fn test_mm256_mul_epi32() {
4781 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4782 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4783 let r = _mm256_mul_epi32(a, b);
4784 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4785 assert_eq_m256i(r, e);
4786 }
4787
4788 #[simd_test(enable = "avx2")]
4789 unsafe fn test_mm256_mul_epu32() {
4790 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4791 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4792 let r = _mm256_mul_epu32(a, b);
4793 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4794 assert_eq_m256i(r, e);
4795 }
4796
4797 #[simd_test(enable = "avx2")]
4798 unsafe fn test_mm256_mulhi_epi16() {
4799 let a = _mm256_set1_epi16(6535);
4800 let b = _mm256_set1_epi16(6535);
4801 let r = _mm256_mulhi_epi16(a, b);
4802 let e = _mm256_set1_epi16(651);
4803 assert_eq_m256i(r, e);
4804 }
4805
4806 #[simd_test(enable = "avx2")]
4807 unsafe fn test_mm256_mulhi_epu16() {
4808 let a = _mm256_set1_epi16(6535);
4809 let b = _mm256_set1_epi16(6535);
4810 let r = _mm256_mulhi_epu16(a, b);
4811 let e = _mm256_set1_epi16(651);
4812 assert_eq_m256i(r, e);
4813 }
4814
4815 #[simd_test(enable = "avx2")]
4816 unsafe fn test_mm256_mullo_epi16() {
4817 let a = _mm256_set1_epi16(2);
4818 let b = _mm256_set1_epi16(4);
4819 let r = _mm256_mullo_epi16(a, b);
4820 let e = _mm256_set1_epi16(8);
4821 assert_eq_m256i(r, e);
4822 }
4823
4824 #[simd_test(enable = "avx2")]
4825 unsafe fn test_mm256_mullo_epi32() {
4826 let a = _mm256_set1_epi32(2);
4827 let b = _mm256_set1_epi32(4);
4828 let r = _mm256_mullo_epi32(a, b);
4829 let e = _mm256_set1_epi32(8);
4830 assert_eq_m256i(r, e);
4831 }
4832
4833 #[simd_test(enable = "avx2")]
4834 unsafe fn test_mm256_mulhrs_epi16() {
4835 let a = _mm256_set1_epi16(2);
4836 let b = _mm256_set1_epi16(4);
4837 let r = _mm256_mullo_epi16(a, b);
4838 let e = _mm256_set1_epi16(8);
4839 assert_eq_m256i(r, e);
4840 }
4841
4842 #[simd_test(enable = "avx2")]
4843 unsafe fn test_mm256_or_si256() {
4844 let a = _mm256_set1_epi8(-1);
4845 let b = _mm256_set1_epi8(0);
4846 let r = _mm256_or_si256(a, b);
4847 assert_eq_m256i(r, a);
4848 }
4849
4850 #[simd_test(enable = "avx2")]
4851 unsafe fn test_mm256_packs_epi16() {
4852 let a = _mm256_set1_epi16(2);
4853 let b = _mm256_set1_epi16(4);
4854 let r = _mm256_packs_epi16(a, b);
4855 #[rustfmt::skip]
4856 let e = _mm256_setr_epi8(
4857 2, 2, 2, 2, 2, 2, 2, 2,
4858 4, 4, 4, 4, 4, 4, 4, 4,
4859 2, 2, 2, 2, 2, 2, 2, 2,
4860 4, 4, 4, 4, 4, 4, 4, 4,
4861 );
4862
4863 assert_eq_m256i(r, e);
4864 }
4865
4866 #[simd_test(enable = "avx2")]
4867 unsafe fn test_mm256_packs_epi32() {
4868 let a = _mm256_set1_epi32(2);
4869 let b = _mm256_set1_epi32(4);
4870 let r = _mm256_packs_epi32(a, b);
4871 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4872
4873 assert_eq_m256i(r, e);
4874 }
4875
4876 #[simd_test(enable = "avx2")]
4877 unsafe fn test_mm256_packus_epi16() {
4878 let a = _mm256_set1_epi16(2);
4879 let b = _mm256_set1_epi16(4);
4880 let r = _mm256_packus_epi16(a, b);
4881 #[rustfmt::skip]
4882 let e = _mm256_setr_epi8(
4883 2, 2, 2, 2, 2, 2, 2, 2,
4884 4, 4, 4, 4, 4, 4, 4, 4,
4885 2, 2, 2, 2, 2, 2, 2, 2,
4886 4, 4, 4, 4, 4, 4, 4, 4,
4887 );
4888
4889 assert_eq_m256i(r, e);
4890 }
4891
4892 #[simd_test(enable = "avx2")]
4893 unsafe fn test_mm256_packus_epi32() {
4894 let a = _mm256_set1_epi32(2);
4895 let b = _mm256_set1_epi32(4);
4896 let r = _mm256_packus_epi32(a, b);
4897 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4898
4899 assert_eq_m256i(r, e);
4900 }
4901
4902 #[simd_test(enable = "avx2")]
4903 unsafe fn test_mm256_sad_epu8() {
4904 let a = _mm256_set1_epi8(2);
4905 let b = _mm256_set1_epi8(4);
4906 let r = _mm256_sad_epu8(a, b);
4907 let e = _mm256_set1_epi64x(16);
4908 assert_eq_m256i(r, e);
4909 }
4910
4911 #[simd_test(enable = "avx2")]
4912 unsafe fn test_mm256_shufflehi_epi16() {
4913 #[rustfmt::skip]
4914 let a = _mm256_setr_epi16(
4915 0, 1, 2, 3, 11, 22, 33, 44,
4916 4, 5, 6, 7, 55, 66, 77, 88,
4917 );
4918 #[rustfmt::skip]
4919 let e = _mm256_setr_epi16(
4920 0, 1, 2, 3, 44, 22, 22, 11,
4921 4, 5, 6, 7, 88, 66, 66, 55,
4922 );
4923 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
4924 assert_eq_m256i(r, e);
4925 }
4926
4927 #[simd_test(enable = "avx2")]
4928 unsafe fn test_mm256_shufflelo_epi16() {
4929 #[rustfmt::skip]
4930 let a = _mm256_setr_epi16(
4931 11, 22, 33, 44, 0, 1, 2, 3,
4932 55, 66, 77, 88, 4, 5, 6, 7,
4933 );
4934 #[rustfmt::skip]
4935 let e = _mm256_setr_epi16(
4936 44, 22, 22, 11, 0, 1, 2, 3,
4937 88, 66, 66, 55, 4, 5, 6, 7,
4938 );
4939 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
4940 assert_eq_m256i(r, e);
4941 }
4942
4943 #[simd_test(enable = "avx2")]
4944 unsafe fn test_mm256_sign_epi16() {
4945 let a = _mm256_set1_epi16(2);
4946 let b = _mm256_set1_epi16(-1);
4947 let r = _mm256_sign_epi16(a, b);
4948 let e = _mm256_set1_epi16(-2);
4949 assert_eq_m256i(r, e);
4950 }
4951
4952 #[simd_test(enable = "avx2")]
4953 unsafe fn test_mm256_sign_epi32() {
4954 let a = _mm256_set1_epi32(2);
4955 let b = _mm256_set1_epi32(-1);
4956 let r = _mm256_sign_epi32(a, b);
4957 let e = _mm256_set1_epi32(-2);
4958 assert_eq_m256i(r, e);
4959 }
4960
4961 #[simd_test(enable = "avx2")]
4962 unsafe fn test_mm256_sign_epi8() {
4963 let a = _mm256_set1_epi8(2);
4964 let b = _mm256_set1_epi8(-1);
4965 let r = _mm256_sign_epi8(a, b);
4966 let e = _mm256_set1_epi8(-2);
4967 assert_eq_m256i(r, e);
4968 }
4969
4970 #[simd_test(enable = "avx2")]
4971 unsafe fn test_mm256_sll_epi16() {
4972 let a = _mm256_set1_epi16(0xFF);
4973 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
4974 let r = _mm256_sll_epi16(a, b);
4975 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
4976 }
4977
4978 #[simd_test(enable = "avx2")]
4979 unsafe fn test_mm256_sll_epi32() {
4980 let a = _mm256_set1_epi32(0xFFFF);
4981 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
4982 let r = _mm256_sll_epi32(a, b);
4983 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
4984 }
4985
4986 #[simd_test(enable = "avx2")]
4987 unsafe fn test_mm256_sll_epi64() {
4988 let a = _mm256_set1_epi64x(0xFFFFFFFF);
4989 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
4990 let r = _mm256_sll_epi64(a, b);
4991 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
4992 }
4993
4994 #[simd_test(enable = "avx2")]
4995 unsafe fn test_mm256_slli_epi16() {
4996 assert_eq_m256i(
4997 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
4998 _mm256_set1_epi16(0xFF0),
4999 );
5000 }
5001
5002 #[simd_test(enable = "avx2")]
5003 unsafe fn test_mm256_slli_epi32() {
5004 assert_eq_m256i(
5005 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5006 _mm256_set1_epi32(0xFFFF0),
5007 );
5008 }
5009
5010 #[simd_test(enable = "avx2")]
5011 unsafe fn test_mm256_slli_epi64() {
5012 assert_eq_m256i(
5013 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5014 _mm256_set1_epi64x(0xFFFFFFFF0),
5015 );
5016 }
5017
5018 #[simd_test(enable = "avx2")]
5019 unsafe fn test_mm256_slli_si256() {
5020 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5021 let r = _mm256_slli_si256::<3>(a);
5022 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5023 }
5024
5025 #[simd_test(enable = "avx2")]
5026 unsafe fn test_mm_sllv_epi32() {
5027 let a = _mm_set1_epi32(2);
5028 let b = _mm_set1_epi32(1);
5029 let r = _mm_sllv_epi32(a, b);
5030 let e = _mm_set1_epi32(4);
5031 assert_eq_m128i(r, e);
5032 }
5033
5034 #[simd_test(enable = "avx2")]
5035 unsafe fn test_mm256_sllv_epi32() {
5036 let a = _mm256_set1_epi32(2);
5037 let b = _mm256_set1_epi32(1);
5038 let r = _mm256_sllv_epi32(a, b);
5039 let e = _mm256_set1_epi32(4);
5040 assert_eq_m256i(r, e);
5041 }
5042
5043 #[simd_test(enable = "avx2")]
5044 unsafe fn test_mm_sllv_epi64() {
5045 let a = _mm_set1_epi64x(2);
5046 let b = _mm_set1_epi64x(1);
5047 let r = _mm_sllv_epi64(a, b);
5048 let e = _mm_set1_epi64x(4);
5049 assert_eq_m128i(r, e);
5050 }
5051
5052 #[simd_test(enable = "avx2")]
5053 unsafe fn test_mm256_sllv_epi64() {
5054 let a = _mm256_set1_epi64x(2);
5055 let b = _mm256_set1_epi64x(1);
5056 let r = _mm256_sllv_epi64(a, b);
5057 let e = _mm256_set1_epi64x(4);
5058 assert_eq_m256i(r, e);
5059 }
5060
5061 #[simd_test(enable = "avx2")]
5062 unsafe fn test_mm256_sra_epi16() {
5063 let a = _mm256_set1_epi16(-1);
5064 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5065 let r = _mm256_sra_epi16(a, b);
5066 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5067 }
5068
5069 #[simd_test(enable = "avx2")]
5070 unsafe fn test_mm256_sra_epi32() {
5071 let a = _mm256_set1_epi32(-1);
5072 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5073 let r = _mm256_sra_epi32(a, b);
5074 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5075 }
5076
5077 #[simd_test(enable = "avx2")]
5078 unsafe fn test_mm256_srai_epi16() {
5079 assert_eq_m256i(
5080 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5081 _mm256_set1_epi16(-1),
5082 );
5083 }
5084
5085 #[simd_test(enable = "avx2")]
5086 unsafe fn test_mm256_srai_epi32() {
5087 assert_eq_m256i(
5088 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5089 _mm256_set1_epi32(-1),
5090 );
5091 }
5092
5093 #[simd_test(enable = "avx2")]
5094 unsafe fn test_mm_srav_epi32() {
5095 let a = _mm_set1_epi32(4);
5096 let count = _mm_set1_epi32(1);
5097 let r = _mm_srav_epi32(a, count);
5098 let e = _mm_set1_epi32(2);
5099 assert_eq_m128i(r, e);
5100 }
5101
5102 #[simd_test(enable = "avx2")]
5103 unsafe fn test_mm256_srav_epi32() {
5104 let a = _mm256_set1_epi32(4);
5105 let count = _mm256_set1_epi32(1);
5106 let r = _mm256_srav_epi32(a, count);
5107 let e = _mm256_set1_epi32(2);
5108 assert_eq_m256i(r, e);
5109 }
5110
5111 #[simd_test(enable = "avx2")]
5112 unsafe fn test_mm256_srli_si256() {
5113 #[rustfmt::skip]
5114 let a = _mm256_setr_epi8(
5115 1, 2, 3, 4, 5, 6, 7, 8,
5116 9, 10, 11, 12, 13, 14, 15, 16,
5117 17, 18, 19, 20, 21, 22, 23, 24,
5118 25, 26, 27, 28, 29, 30, 31, 32,
5119 );
5120 let r = _mm256_srli_si256::<3>(a);
5121 #[rustfmt::skip]
5122 let e = _mm256_setr_epi8(
5123 4, 5, 6, 7, 8, 9, 10, 11,
5124 12, 13, 14, 15, 16, 0, 0, 0,
5125 20, 21, 22, 23, 24, 25, 26, 27,
5126 28, 29, 30, 31, 32, 0, 0, 0,
5127 );
5128 assert_eq_m256i(r, e);
5129 }
5130
5131 #[simd_test(enable = "avx2")]
5132 unsafe fn test_mm256_srl_epi16() {
5133 let a = _mm256_set1_epi16(0xFF);
5134 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5135 let r = _mm256_srl_epi16(a, b);
5136 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5137 }
5138
5139 #[simd_test(enable = "avx2")]
5140 unsafe fn test_mm256_srl_epi32() {
5141 let a = _mm256_set1_epi32(0xFFFF);
5142 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5143 let r = _mm256_srl_epi32(a, b);
5144 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5145 }
5146
5147 #[simd_test(enable = "avx2")]
5148 unsafe fn test_mm256_srl_epi64() {
5149 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5150 let b = _mm_setr_epi64x(4, 0);
5151 let r = _mm256_srl_epi64(a, b);
5152 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5153 }
5154
5155 #[simd_test(enable = "avx2")]
5156 unsafe fn test_mm256_srli_epi16() {
5157 assert_eq_m256i(
5158 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5159 _mm256_set1_epi16(0xF),
5160 );
5161 }
5162
5163 #[simd_test(enable = "avx2")]
5164 unsafe fn test_mm256_srli_epi32() {
5165 assert_eq_m256i(
5166 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5167 _mm256_set1_epi32(0xFFF),
5168 );
5169 }
5170
5171 #[simd_test(enable = "avx2")]
5172 unsafe fn test_mm256_srli_epi64() {
5173 assert_eq_m256i(
5174 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5175 _mm256_set1_epi64x(0xFFFFFFF),
5176 );
5177 }
5178
5179 #[simd_test(enable = "avx2")]
5180 unsafe fn test_mm_srlv_epi32() {
5181 let a = _mm_set1_epi32(2);
5182 let count = _mm_set1_epi32(1);
5183 let r = _mm_srlv_epi32(a, count);
5184 let e = _mm_set1_epi32(1);
5185 assert_eq_m128i(r, e);
5186 }
5187
5188 #[simd_test(enable = "avx2")]
5189 unsafe fn test_mm256_srlv_epi32() {
5190 let a = _mm256_set1_epi32(2);
5191 let count = _mm256_set1_epi32(1);
5192 let r = _mm256_srlv_epi32(a, count);
5193 let e = _mm256_set1_epi32(1);
5194 assert_eq_m256i(r, e);
5195 }
5196
5197 #[simd_test(enable = "avx2")]
5198 unsafe fn test_mm_srlv_epi64() {
5199 let a = _mm_set1_epi64x(2);
5200 let count = _mm_set1_epi64x(1);
5201 let r = _mm_srlv_epi64(a, count);
5202 let e = _mm_set1_epi64x(1);
5203 assert_eq_m128i(r, e);
5204 }
5205
5206 #[simd_test(enable = "avx2")]
5207 unsafe fn test_mm256_srlv_epi64() {
5208 let a = _mm256_set1_epi64x(2);
5209 let count = _mm256_set1_epi64x(1);
5210 let r = _mm256_srlv_epi64(a, count);
5211 let e = _mm256_set1_epi64x(1);
5212 assert_eq_m256i(r, e);
5213 }
5214
5215 #[simd_test(enable = "avx2")]
5216 unsafe fn test_mm256_stream_load_si256() {
5217 let a = _mm256_set_epi64x(5, 6, 7, 8);
5218 let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
5219 assert_eq_m256i(a, r);
5220 }
5221
5222 #[simd_test(enable = "avx2")]
5223 unsafe fn test_mm256_sub_epi16() {
5224 let a = _mm256_set1_epi16(4);
5225 let b = _mm256_set1_epi16(2);
5226 let r = _mm256_sub_epi16(a, b);
5227 assert_eq_m256i(r, b);
5228 }
5229
5230 #[simd_test(enable = "avx2")]
5231 unsafe fn test_mm256_sub_epi32() {
5232 let a = _mm256_set1_epi32(4);
5233 let b = _mm256_set1_epi32(2);
5234 let r = _mm256_sub_epi32(a, b);
5235 assert_eq_m256i(r, b);
5236 }
5237
5238 #[simd_test(enable = "avx2")]
5239 unsafe fn test_mm256_sub_epi64() {
5240 let a = _mm256_set1_epi64x(4);
5241 let b = _mm256_set1_epi64x(2);
5242 let r = _mm256_sub_epi64(a, b);
5243 assert_eq_m256i(r, b);
5244 }
5245
5246 #[simd_test(enable = "avx2")]
5247 unsafe fn test_mm256_sub_epi8() {
5248 let a = _mm256_set1_epi8(4);
5249 let b = _mm256_set1_epi8(2);
5250 let r = _mm256_sub_epi8(a, b);
5251 assert_eq_m256i(r, b);
5252 }
5253
5254 #[simd_test(enable = "avx2")]
5255 unsafe fn test_mm256_subs_epi16() {
5256 let a = _mm256_set1_epi16(4);
5257 let b = _mm256_set1_epi16(2);
5258 let r = _mm256_subs_epi16(a, b);
5259 assert_eq_m256i(r, b);
5260 }
5261
5262 #[simd_test(enable = "avx2")]
5263 unsafe fn test_mm256_subs_epi8() {
5264 let a = _mm256_set1_epi8(4);
5265 let b = _mm256_set1_epi8(2);
5266 let r = _mm256_subs_epi8(a, b);
5267 assert_eq_m256i(r, b);
5268 }
5269
5270 #[simd_test(enable = "avx2")]
5271 unsafe fn test_mm256_subs_epu16() {
5272 let a = _mm256_set1_epi16(4);
5273 let b = _mm256_set1_epi16(2);
5274 let r = _mm256_subs_epu16(a, b);
5275 assert_eq_m256i(r, b);
5276 }
5277
5278 #[simd_test(enable = "avx2")]
5279 unsafe fn test_mm256_subs_epu8() {
5280 let a = _mm256_set1_epi8(4);
5281 let b = _mm256_set1_epi8(2);
5282 let r = _mm256_subs_epu8(a, b);
5283 assert_eq_m256i(r, b);
5284 }
5285
5286 #[simd_test(enable = "avx2")]
5287 unsafe fn test_mm256_xor_si256() {
5288 let a = _mm256_set1_epi8(5);
5289 let b = _mm256_set1_epi8(3);
5290 let r = _mm256_xor_si256(a, b);
5291 assert_eq_m256i(r, _mm256_set1_epi8(6));
5292 }
5293
5294 #[simd_test(enable = "avx2")]
5295 unsafe fn test_mm256_alignr_epi8() {
5296 #[rustfmt::skip]
5297 let a = _mm256_setr_epi8(
5298 1, 2, 3, 4, 5, 6, 7, 8,
5299 9, 10, 11, 12, 13, 14, 15, 16,
5300 17, 18, 19, 20, 21, 22, 23, 24,
5301 25, 26, 27, 28, 29, 30, 31, 32,
5302 );
5303 #[rustfmt::skip]
5304 let b = _mm256_setr_epi8(
5305 -1, -2, -3, -4, -5, -6, -7, -8,
5306 -9, -10, -11, -12, -13, -14, -15, -16,
5307 -17, -18, -19, -20, -21, -22, -23, -24,
5308 -25, -26, -27, -28, -29, -30, -31, -32,
5309 );
5310 let r = _mm256_alignr_epi8::<33>(a, b);
5311 assert_eq_m256i(r, _mm256_set1_epi8(0));
5312
5313 let r = _mm256_alignr_epi8::<17>(a, b);
5314 #[rustfmt::skip]
5315 let expected = _mm256_setr_epi8(
5316 2, 3, 4, 5, 6, 7, 8, 9,
5317 10, 11, 12, 13, 14, 15, 16, 0,
5318 18, 19, 20, 21, 22, 23, 24, 25,
5319 26, 27, 28, 29, 30, 31, 32, 0,
5320 );
5321 assert_eq_m256i(r, expected);
5322
5323 let r = _mm256_alignr_epi8::<4>(a, b);
5324 #[rustfmt::skip]
5325 let expected = _mm256_setr_epi8(
5326 -5, -6, -7, -8, -9, -10, -11, -12,
5327 -13, -14, -15, -16, 1, 2, 3, 4,
5328 -21, -22, -23, -24, -25, -26, -27, -28,
5329 -29, -30, -31, -32, 17, 18, 19, 20,
5330 );
5331 assert_eq_m256i(r, expected);
5332
5333 let r = _mm256_alignr_epi8::<15>(a, b);
5334 #[rustfmt::skip]
5335 let expected = _mm256_setr_epi8(
5336 -16, 1, 2, 3, 4, 5, 6, 7,
5337 8, 9, 10, 11, 12, 13, 14, 15,
5338 -32, 17, 18, 19, 20, 21, 22, 23,
5339 24, 25, 26, 27, 28, 29, 30, 31,
5340 );
5341 assert_eq_m256i(r, expected);
5342
5343 let r = _mm256_alignr_epi8::<0>(a, b);
5344 assert_eq_m256i(r, b);
5345
5346 let r = _mm256_alignr_epi8::<16>(a, b);
5347 assert_eq_m256i(r, a);
5348 }
5349
5350 #[simd_test(enable = "avx2")]
5351 unsafe fn test_mm256_shuffle_epi8() {
5352 #[rustfmt::skip]
5353 let a = _mm256_setr_epi8(
5354 1, 2, 3, 4, 5, 6, 7, 8,
5355 9, 10, 11, 12, 13, 14, 15, 16,
5356 17, 18, 19, 20, 21, 22, 23, 24,
5357 25, 26, 27, 28, 29, 30, 31, 32,
5358 );
5359 #[rustfmt::skip]
5360 let b = _mm256_setr_epi8(
5361 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5362 12, 5, 5, 10, 4, 1, 8, 0,
5363 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5364 12, 5, 5, 10, 4, 1, 8, 0,
5365 );
5366 #[rustfmt::skip]
5367 let expected = _mm256_setr_epi8(
5368 5, 0, 5, 4, 9, 13, 7, 4,
5369 13, 6, 6, 11, 5, 2, 9, 1,
5370 21, 0, 21, 20, 25, 29, 23, 20,
5371 29, 22, 22, 27, 21, 18, 25, 17,
5372 );
5373 let r = _mm256_shuffle_epi8(a, b);
5374 assert_eq_m256i(r, expected);
5375 }
5376
5377 #[simd_test(enable = "avx2")]
5378 unsafe fn test_mm256_permutevar8x32_epi32() {
5379 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5380 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5381 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5382 let r = _mm256_permutevar8x32_epi32(a, b);
5383 assert_eq_m256i(r, expected);
5384 }
5385
5386 #[simd_test(enable = "avx2")]
5387 unsafe fn test_mm256_permute4x64_epi64() {
5388 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5389 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5390 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5391 assert_eq_m256i(r, expected);
5392 }
5393
5394 #[simd_test(enable = "avx2")]
5395 unsafe fn test_mm256_permute2x128_si256() {
5396 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5397 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5398 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5399 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5400 assert_eq_m256i(r, e);
5401 }
5402
5403 #[simd_test(enable = "avx2")]
5404 unsafe fn test_mm256_permute4x64_pd() {
5405 let a = _mm256_setr_pd(1., 2., 3., 4.);
5406 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5407 let e = _mm256_setr_pd(4., 1., 2., 1.);
5408 assert_eq_m256d(r, e);
5409 }
5410
5411 #[simd_test(enable = "avx2")]
5412 unsafe fn test_mm256_permutevar8x32_ps() {
5413 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5414 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5415 let r = _mm256_permutevar8x32_ps(a, b);
5416 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5417 assert_eq_m256(r, e);
5418 }
5419
5420 #[simd_test(enable = "avx2")]
5421 unsafe fn test_mm_i32gather_epi32() {
5422 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5423 // A multiplier of 4 is word-addressing
5424 let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5425 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5426 }
5427
5428 #[simd_test(enable = "avx2")]
5429 unsafe fn test_mm_mask_i32gather_epi32() {
5430 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5431 // A multiplier of 4 is word-addressing
5432 let r = _mm_mask_i32gather_epi32::<4>(
5433 _mm_set1_epi32(256),
5434 arr.as_ptr(),
5435 _mm_setr_epi32(0, 16, 64, 96),
5436 _mm_setr_epi32(-1, -1, -1, 0),
5437 );
5438 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5439 }
5440
5441 #[simd_test(enable = "avx2")]
5442 unsafe fn test_mm256_i32gather_epi32() {
5443 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5444 // A multiplier of 4 is word-addressing
5445 let r =
5446 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5447 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5448 }
5449
5450 #[simd_test(enable = "avx2")]
5451 unsafe fn test_mm256_mask_i32gather_epi32() {
5452 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5453 // A multiplier of 4 is word-addressing
5454 let r = _mm256_mask_i32gather_epi32::<4>(
5455 _mm256_set1_epi32(256),
5456 arr.as_ptr(),
5457 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5458 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5459 );
5460 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5461 }
5462
5463 #[simd_test(enable = "avx2")]
5464 unsafe fn test_mm_i32gather_ps() {
5465 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5466 // A multiplier of 4 is word-addressing for f32s
5467 let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5468 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5469 }
5470
5471 #[simd_test(enable = "avx2")]
5472 unsafe fn test_mm_mask_i32gather_ps() {
5473 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5474 // A multiplier of 4 is word-addressing for f32s
5475 let r = _mm_mask_i32gather_ps::<4>(
5476 _mm_set1_ps(256.0),
5477 arr.as_ptr(),
5478 _mm_setr_epi32(0, 16, 64, 96),
5479 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5480 );
5481 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5482 }
5483
5484 #[simd_test(enable = "avx2")]
5485 unsafe fn test_mm256_i32gather_ps() {
5486 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5487 // A multiplier of 4 is word-addressing for f32s
5488 let r =
5489 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5490 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5491 }
5492
5493 #[simd_test(enable = "avx2")]
5494 unsafe fn test_mm256_mask_i32gather_ps() {
5495 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5496 // A multiplier of 4 is word-addressing for f32s
5497 let r = _mm256_mask_i32gather_ps::<4>(
5498 _mm256_set1_ps(256.0),
5499 arr.as_ptr(),
5500 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5501 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5502 );
5503 assert_eq_m256(
5504 r,
5505 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5506 );
5507 }
5508
5509 #[simd_test(enable = "avx2")]
5510 unsafe fn test_mm_i32gather_epi64() {
5511 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5512 // A multiplier of 8 is word-addressing for i64s
5513 let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5514 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5515 }
5516
5517 #[simd_test(enable = "avx2")]
5518 unsafe fn test_mm_mask_i32gather_epi64() {
5519 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5520 // A multiplier of 8 is word-addressing for i64s
5521 let r = _mm_mask_i32gather_epi64::<8>(
5522 _mm_set1_epi64x(256),
5523 arr.as_ptr(),
5524 _mm_setr_epi32(16, 16, 16, 16),
5525 _mm_setr_epi64x(-1, 0),
5526 );
5527 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5528 }
5529
5530 #[simd_test(enable = "avx2")]
5531 unsafe fn test_mm256_i32gather_epi64() {
5532 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5533 // A multiplier of 8 is word-addressing for i64s
5534 let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5535 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5536 }
5537
5538 #[simd_test(enable = "avx2")]
5539 unsafe fn test_mm256_mask_i32gather_epi64() {
5540 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5541 // A multiplier of 8 is word-addressing for i64s
5542 let r = _mm256_mask_i32gather_epi64::<8>(
5543 _mm256_set1_epi64x(256),
5544 arr.as_ptr(),
5545 _mm_setr_epi32(0, 16, 64, 96),
5546 _mm256_setr_epi64x(-1, -1, -1, 0),
5547 );
5548 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5549 }
5550
5551 #[simd_test(enable = "avx2")]
5552 unsafe fn test_mm_i32gather_pd() {
5553 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5554 // A multiplier of 8 is word-addressing for f64s
5555 let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5556 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5557 }
5558
5559 #[simd_test(enable = "avx2")]
5560 unsafe fn test_mm_mask_i32gather_pd() {
5561 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5562 // A multiplier of 8 is word-addressing for f64s
5563 let r = _mm_mask_i32gather_pd::<8>(
5564 _mm_set1_pd(256.0),
5565 arr.as_ptr(),
5566 _mm_setr_epi32(16, 16, 16, 16),
5567 _mm_setr_pd(-1.0, 0.0),
5568 );
5569 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5570 }
5571
5572 #[simd_test(enable = "avx2")]
5573 unsafe fn test_mm256_i32gather_pd() {
5574 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5575 // A multiplier of 8 is word-addressing for f64s
5576 let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5577 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5578 }
5579
5580 #[simd_test(enable = "avx2")]
5581 unsafe fn test_mm256_mask_i32gather_pd() {
5582 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5583 // A multiplier of 8 is word-addressing for f64s
5584 let r = _mm256_mask_i32gather_pd::<8>(
5585 _mm256_set1_pd(256.0),
5586 arr.as_ptr(),
5587 _mm_setr_epi32(0, 16, 64, 96),
5588 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5589 );
5590 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5591 }
5592
5593 #[simd_test(enable = "avx2")]
5594 unsafe fn test_mm_i64gather_epi32() {
5595 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5596 // A multiplier of 4 is word-addressing
5597 let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5598 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5599 }
5600
5601 #[simd_test(enable = "avx2")]
5602 unsafe fn test_mm_mask_i64gather_epi32() {
5603 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5604 // A multiplier of 4 is word-addressing
5605 let r = _mm_mask_i64gather_epi32::<4>(
5606 _mm_set1_epi32(256),
5607 arr.as_ptr(),
5608 _mm_setr_epi64x(0, 16),
5609 _mm_setr_epi32(-1, 0, -1, 0),
5610 );
5611 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5612 }
5613
5614 #[simd_test(enable = "avx2")]
5615 unsafe fn test_mm256_i64gather_epi32() {
5616 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5617 // A multiplier of 4 is word-addressing
5618 let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5619 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5620 }
5621
5622 #[simd_test(enable = "avx2")]
5623 unsafe fn test_mm256_mask_i64gather_epi32() {
5624 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5625 // A multiplier of 4 is word-addressing
5626 let r = _mm256_mask_i64gather_epi32::<4>(
5627 _mm_set1_epi32(256),
5628 arr.as_ptr(),
5629 _mm256_setr_epi64x(0, 16, 64, 96),
5630 _mm_setr_epi32(-1, -1, -1, 0),
5631 );
5632 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5633 }
5634
5635 #[simd_test(enable = "avx2")]
5636 unsafe fn test_mm_i64gather_ps() {
5637 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5638 // A multiplier of 4 is word-addressing for f32s
5639 let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5640 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5641 }
5642
5643 #[simd_test(enable = "avx2")]
5644 unsafe fn test_mm_mask_i64gather_ps() {
5645 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5646 // A multiplier of 4 is word-addressing for f32s
5647 let r = _mm_mask_i64gather_ps::<4>(
5648 _mm_set1_ps(256.0),
5649 arr.as_ptr(),
5650 _mm_setr_epi64x(0, 16),
5651 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5652 );
5653 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5654 }
5655
5656 #[simd_test(enable = "avx2")]
5657 unsafe fn test_mm256_i64gather_ps() {
5658 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5659 // A multiplier of 4 is word-addressing for f32s
5660 let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5661 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5662 }
5663
5664 #[simd_test(enable = "avx2")]
5665 unsafe fn test_mm256_mask_i64gather_ps() {
5666 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5667 // A multiplier of 4 is word-addressing for f32s
5668 let r = _mm256_mask_i64gather_ps::<4>(
5669 _mm_set1_ps(256.0),
5670 arr.as_ptr(),
5671 _mm256_setr_epi64x(0, 16, 64, 96),
5672 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5673 );
5674 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5675 }
5676
5677 #[simd_test(enable = "avx2")]
5678 unsafe fn test_mm_i64gather_epi64() {
5679 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5680 // A multiplier of 8 is word-addressing for i64s
5681 let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5682 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5683 }
5684
5685 #[simd_test(enable = "avx2")]
5686 unsafe fn test_mm_mask_i64gather_epi64() {
5687 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5688 // A multiplier of 8 is word-addressing for i64s
5689 let r = _mm_mask_i64gather_epi64::<8>(
5690 _mm_set1_epi64x(256),
5691 arr.as_ptr(),
5692 _mm_setr_epi64x(16, 16),
5693 _mm_setr_epi64x(-1, 0),
5694 );
5695 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5696 }
5697
5698 #[simd_test(enable = "avx2")]
5699 unsafe fn test_mm256_i64gather_epi64() {
5700 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5701 // A multiplier of 8 is word-addressing for i64s
5702 let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5703 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5704 }
5705
5706 #[simd_test(enable = "avx2")]
5707 unsafe fn test_mm256_mask_i64gather_epi64() {
5708 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5709 // A multiplier of 8 is word-addressing for i64s
5710 let r = _mm256_mask_i64gather_epi64::<8>(
5711 _mm256_set1_epi64x(256),
5712 arr.as_ptr(),
5713 _mm256_setr_epi64x(0, 16, 64, 96),
5714 _mm256_setr_epi64x(-1, -1, -1, 0),
5715 );
5716 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5717 }
5718
5719 #[simd_test(enable = "avx2")]
5720 unsafe fn test_mm_i64gather_pd() {
5721 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5722 // A multiplier of 8 is word-addressing for f64s
5723 let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5724 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5725 }
5726
5727 #[simd_test(enable = "avx2")]
5728 unsafe fn test_mm_mask_i64gather_pd() {
5729 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5730 // A multiplier of 8 is word-addressing for f64s
5731 let r = _mm_mask_i64gather_pd::<8>(
5732 _mm_set1_pd(256.0),
5733 arr.as_ptr(),
5734 _mm_setr_epi64x(16, 16),
5735 _mm_setr_pd(-1.0, 0.0),
5736 );
5737 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5738 }
5739
5740 #[simd_test(enable = "avx2")]
5741 unsafe fn test_mm256_i64gather_pd() {
5742 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5743 // A multiplier of 8 is word-addressing for f64s
5744 let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5745 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5746 }
5747
5748 #[simd_test(enable = "avx2")]
5749 unsafe fn test_mm256_mask_i64gather_pd() {
5750 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5751 // A multiplier of 8 is word-addressing for f64s
5752 let r = _mm256_mask_i64gather_pd::<8>(
5753 _mm256_set1_pd(256.0),
5754 arr.as_ptr(),
5755 _mm256_setr_epi64x(0, 16, 64, 96),
5756 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5757 );
5758 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5759 }
5760
5761 #[simd_test(enable = "avx2")]
5762 unsafe fn test_mm256_extract_epi8() {
5763 #[rustfmt::skip]
5764 let a = _mm256_setr_epi8(
5765 -1, 1, 2, 3, 4, 5, 6, 7,
5766 8, 9, 10, 11, 12, 13, 14, 15,
5767 16, 17, 18, 19, 20, 21, 22, 23,
5768 24, 25, 26, 27, 28, 29, 30, 31
5769 );
5770 let r1 = _mm256_extract_epi8::<0>(a);
5771 let r2 = _mm256_extract_epi8::<3>(a);
5772 assert_eq!(r1, 0xFF);
5773 assert_eq!(r2, 3);
5774 }
5775
5776 #[simd_test(enable = "avx2")]
5777 unsafe fn test_mm256_extract_epi16() {
5778 #[rustfmt::skip]
5779 let a = _mm256_setr_epi16(
5780 -1, 1, 2, 3, 4, 5, 6, 7,
5781 8, 9, 10, 11, 12, 13, 14, 15,
5782 );
5783 let r1 = _mm256_extract_epi16::<0>(a);
5784 let r2 = _mm256_extract_epi16::<3>(a);
5785 assert_eq!(r1, 0xFFFF);
5786 assert_eq!(r2, 3);
5787 }
5788}
5789