1//! Advanced Vector Extensions 2 (AVX)
2//!
3//! AVX2 expands most AVX commands to 256-bit wide vector registers and
4//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
5//!
6//! The references are:
7//!
8//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
9//! Instruction Set Reference, A-Z][intel64_ref].
10//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
11//! System Instructions][amd64_ref].
12//!
13//! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
14//! overview of the instructions available.
15//!
16//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
17//! [amd64_ref]: https://docs.amd.com/v/u/en-US/24594_3.37
18//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
19//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
20
21use crate::core_arch::{simd::*, x86::*};
22use crate::intrinsics::simd::*;
23
24#[cfg(test)]
25use stdarch_test::assert_instr;
26
27/// Computes the absolute values of packed 32-bit integers in `a`.
28///
29/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi32)
30#[inline]
31#[target_feature(enable = "avx2")]
32#[cfg_attr(test, assert_instr(vpabsd))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35pub const fn _mm256_abs_epi32(a: __m256i) -> __m256i {
36 unsafe {
37 let a: Simd = a.as_i32x8();
38 let r: Simd = simd_select::<m32x8, _>(mask:simd_lt(a, i32x8::ZERO), if_true:simd_neg(a), if_false:a);
39 transmute(src:r)
40 }
41}
42
43/// Computes the absolute values of packed 16-bit integers in `a`.
44///
45/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi16)
46#[inline]
47#[target_feature(enable = "avx2")]
48#[cfg_attr(test, assert_instr(vpabsw))]
49#[stable(feature = "simd_x86", since = "1.27.0")]
50#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
51pub const fn _mm256_abs_epi16(a: __m256i) -> __m256i {
52 unsafe {
53 let a: Simd = a.as_i16x16();
54 let r: Simd = simd_select::<m16x16, _>(mask:simd_lt(a, i16x16::ZERO), if_true:simd_neg(a), if_false:a);
55 transmute(src:r)
56 }
57}
58
59/// Computes the absolute values of packed 8-bit integers in `a`.
60///
61/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi8)
62#[inline]
63#[target_feature(enable = "avx2")]
64#[cfg_attr(test, assert_instr(vpabsb))]
65#[stable(feature = "simd_x86", since = "1.27.0")]
66#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
67pub const fn _mm256_abs_epi8(a: __m256i) -> __m256i {
68 unsafe {
69 let a: Simd = a.as_i8x32();
70 let r: Simd = simd_select::<m8x32, _>(mask:simd_lt(a, i8x32::ZERO), if_true:simd_neg(a), if_false:a);
71 transmute(src:r)
72 }
73}
74
75/// Adds packed 64-bit integers in `a` and `b`.
76///
77/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64)
78#[inline]
79#[target_feature(enable = "avx2")]
80#[cfg_attr(test, assert_instr(vpaddq))]
81#[stable(feature = "simd_x86", since = "1.27.0")]
82#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
83pub const fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
84 unsafe { transmute(src:simd_add(x:a.as_i64x4(), y:b.as_i64x4())) }
85}
86
87/// Adds packed 32-bit integers in `a` and `b`.
88///
89/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi32)
90#[inline]
91#[target_feature(enable = "avx2")]
92#[cfg_attr(test, assert_instr(vpaddd))]
93#[stable(feature = "simd_x86", since = "1.27.0")]
94#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
95pub const fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
96 unsafe { transmute(src:simd_add(x:a.as_i32x8(), y:b.as_i32x8())) }
97}
98
99/// Adds packed 16-bit integers in `a` and `b`.
100///
101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi16)
102#[inline]
103#[target_feature(enable = "avx2")]
104#[cfg_attr(test, assert_instr(vpaddw))]
105#[stable(feature = "simd_x86", since = "1.27.0")]
106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
107pub const fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
108 unsafe { transmute(src:simd_add(x:a.as_i16x16(), y:b.as_i16x16())) }
109}
110
111/// Adds packed 8-bit integers in `a` and `b`.
112///
113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi8)
114#[inline]
115#[target_feature(enable = "avx2")]
116#[cfg_attr(test, assert_instr(vpaddb))]
117#[stable(feature = "simd_x86", since = "1.27.0")]
118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
119pub const fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
120 unsafe { transmute(src:simd_add(x:a.as_i8x32(), y:b.as_i8x32())) }
121}
122
123/// Adds packed 8-bit integers in `a` and `b` using saturation.
124///
125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi8)
126#[inline]
127#[target_feature(enable = "avx2")]
128#[cfg_attr(test, assert_instr(vpaddsb))]
129#[stable(feature = "simd_x86", since = "1.27.0")]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
132 unsafe { transmute(src:simd_saturating_add(x:a.as_i8x32(), y:b.as_i8x32())) }
133}
134
135/// Adds packed 16-bit integers in `a` and `b` using saturation.
136///
137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi16)
138#[inline]
139#[target_feature(enable = "avx2")]
140#[cfg_attr(test, assert_instr(vpaddsw))]
141#[stable(feature = "simd_x86", since = "1.27.0")]
142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
143pub const fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
144 unsafe { transmute(src:simd_saturating_add(x:a.as_i16x16(), y:b.as_i16x16())) }
145}
146
147/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
148///
149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu8)
150#[inline]
151#[target_feature(enable = "avx2")]
152#[cfg_attr(test, assert_instr(vpaddusb))]
153#[stable(feature = "simd_x86", since = "1.27.0")]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
156 unsafe { transmute(src:simd_saturating_add(x:a.as_u8x32(), y:b.as_u8x32())) }
157}
158
159/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu16)
162#[inline]
163#[target_feature(enable = "avx2")]
164#[cfg_attr(test, assert_instr(vpaddusw))]
165#[stable(feature = "simd_x86", since = "1.27.0")]
166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
167pub const fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
168 unsafe { transmute(src:simd_saturating_add(x:a.as_u16x16(), y:b.as_u16x16())) }
169}
170
171/// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
172/// result, shifts the result right by `n` bytes, and returns the low 16 bytes.
173///
174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi8)
175#[inline]
176#[target_feature(enable = "avx2")]
177#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
178#[rustc_legacy_const_generics(2)]
179#[stable(feature = "simd_x86", since = "1.27.0")]
180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
181pub const fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
182 static_assert_uimm_bits!(IMM8, 8);
183
184 // If palignr is shifting the pair of vectors more than the size of two
185 // lanes, emit zero.
186 if IMM8 >= 32 {
187 return _mm256_setzero_si256();
188 }
189 // If palignr is shifting the pair of input vectors more than one lane,
190 // but less than two lanes, convert to shifting in zeroes.
191 let (a, b) = if IMM8 > 16 {
192 (_mm256_setzero_si256(), a)
193 } else {
194 (a, b)
195 };
196 unsafe {
197 if IMM8 == 16 {
198 return transmute(a);
199 }
200 }
201 const fn mask(shift: u32, i: u32) -> u32 {
202 let shift = shift % 16;
203 let mod_i = i % 16;
204 if mod_i < (16 - shift) {
205 i + shift
206 } else {
207 i + 16 + shift
208 }
209 }
210
211 unsafe {
212 let r: i8x32 = simd_shuffle!(
213 b.as_i8x32(),
214 a.as_i8x32(),
215 [
216 mask(IMM8 as u32, 0),
217 mask(IMM8 as u32, 1),
218 mask(IMM8 as u32, 2),
219 mask(IMM8 as u32, 3),
220 mask(IMM8 as u32, 4),
221 mask(IMM8 as u32, 5),
222 mask(IMM8 as u32, 6),
223 mask(IMM8 as u32, 7),
224 mask(IMM8 as u32, 8),
225 mask(IMM8 as u32, 9),
226 mask(IMM8 as u32, 10),
227 mask(IMM8 as u32, 11),
228 mask(IMM8 as u32, 12),
229 mask(IMM8 as u32, 13),
230 mask(IMM8 as u32, 14),
231 mask(IMM8 as u32, 15),
232 mask(IMM8 as u32, 16),
233 mask(IMM8 as u32, 17),
234 mask(IMM8 as u32, 18),
235 mask(IMM8 as u32, 19),
236 mask(IMM8 as u32, 20),
237 mask(IMM8 as u32, 21),
238 mask(IMM8 as u32, 22),
239 mask(IMM8 as u32, 23),
240 mask(IMM8 as u32, 24),
241 mask(IMM8 as u32, 25),
242 mask(IMM8 as u32, 26),
243 mask(IMM8 as u32, 27),
244 mask(IMM8 as u32, 28),
245 mask(IMM8 as u32, 29),
246 mask(IMM8 as u32, 30),
247 mask(IMM8 as u32, 31),
248 ],
249 );
250 transmute(r)
251 }
252}
253
254/// Computes the bitwise AND of 256 bits (representing integer data)
255/// in `a` and `b`.
256///
257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_si256)
258#[inline]
259#[target_feature(enable = "avx2")]
260#[cfg_attr(test, assert_instr(vandps))]
261#[stable(feature = "simd_x86", since = "1.27.0")]
262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
263pub const fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
264 unsafe { transmute(src:simd_and(x:a.as_i64x4(), y:b.as_i64x4())) }
265}
266
267/// Computes the bitwise NOT of 256 bits (representing integer data)
268/// in `a` and then AND with `b`.
269///
270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_si256)
271#[inline]
272#[target_feature(enable = "avx2")]
273#[cfg_attr(test, assert_instr(vandnps))]
274#[stable(feature = "simd_x86", since = "1.27.0")]
275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
276pub const fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
277 unsafe {
278 let all_ones: __m256i = _mm256_set1_epi8(-1);
279 transmute(src:simd_and(
280 x:simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
281 y:b.as_i64x4(),
282 ))
283 }
284}
285
286/// Averages packed unsigned 16-bit integers in `a` and `b`.
287///
288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu16)
289#[inline]
290#[target_feature(enable = "avx2")]
291#[cfg_attr(test, assert_instr(vpavgw))]
292#[stable(feature = "simd_x86", since = "1.27.0")]
293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
294pub const fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
295 unsafe {
296 let a: Simd = simd_cast::<_, u32x16>(a.as_u16x16());
297 let b: Simd = simd_cast::<_, u32x16>(b.as_u16x16());
298 let r: Simd = simd_shr(lhs:simd_add(simd_add(a, b), u32x16::splat(1)), rhs:u32x16::splat(1));
299 transmute(src:simd_cast::<_, u16x16>(r))
300 }
301}
302
303/// Averages packed unsigned 8-bit integers in `a` and `b`.
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu8)
306#[inline]
307#[target_feature(enable = "avx2")]
308#[cfg_attr(test, assert_instr(vpavgb))]
309#[stable(feature = "simd_x86", since = "1.27.0")]
310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
311pub const fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
312 unsafe {
313 let a: Simd = simd_cast::<_, u16x32>(a.as_u8x32());
314 let b: Simd = simd_cast::<_, u16x32>(b.as_u8x32());
315 let r: Simd = simd_shr(lhs:simd_add(simd_add(a, b), u16x32::splat(1)), rhs:u16x32::splat(1));
316 transmute(src:simd_cast::<_, u8x32>(r))
317 }
318}
319
320/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM4`.
321///
322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi32)
323#[inline]
324#[target_feature(enable = "avx2")]
325#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
326#[rustc_legacy_const_generics(2)]
327#[stable(feature = "simd_x86", since = "1.27.0")]
328#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
329pub const fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
330 static_assert_uimm_bits!(IMM4, 4);
331 unsafe {
332 let a: Simd = a.as_i32x4();
333 let b: Simd = b.as_i32x4();
334 let r: i32x4 = simd_shuffle!(
335 a,
336 b,
337 [
338 [0, 4, 0, 4][IMM4 as usize & 0b11],
339 [1, 1, 5, 5][IMM4 as usize & 0b11],
340 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
341 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
342 ],
343 );
344 transmute(src:r)
345 }
346}
347
348/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM8`.
349///
350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi32)
351#[inline]
352#[target_feature(enable = "avx2")]
353#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
354#[rustc_legacy_const_generics(2)]
355#[stable(feature = "simd_x86", since = "1.27.0")]
356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
357pub const fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
358 static_assert_uimm_bits!(IMM8, 8);
359 unsafe {
360 let a: Simd = a.as_i32x8();
361 let b: Simd = b.as_i32x8();
362 let r: i32x8 = simd_shuffle!(
363 a,
364 b,
365 [
366 [0, 8, 0, 8][IMM8 as usize & 0b11],
367 [1, 1, 9, 9][IMM8 as usize & 0b11],
368 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
369 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
370 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
371 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
372 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
373 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
374 ],
375 );
376 transmute(src:r)
377 }
378}
379
380/// Blends packed 16-bit integers from `a` and `b` using control mask `IMM8`.
381///
382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi16)
383#[inline]
384#[target_feature(enable = "avx2")]
385#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
386#[rustc_legacy_const_generics(2)]
387#[stable(feature = "simd_x86", since = "1.27.0")]
388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
389pub const fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
390 static_assert_uimm_bits!(IMM8, 8);
391 unsafe {
392 let a = a.as_i16x16();
393 let b = b.as_i16x16();
394
395 let r: i16x16 = simd_shuffle!(
396 a,
397 b,
398 [
399 [0, 16, 0, 16][IMM8 as usize & 0b11],
400 [1, 1, 17, 17][IMM8 as usize & 0b11],
401 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
402 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
403 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
404 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
405 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
406 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
407 [8, 24, 8, 24][IMM8 as usize & 0b11],
408 [9, 9, 25, 25][IMM8 as usize & 0b11],
409 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
410 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
411 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
412 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
413 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
414 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
415 ],
416 );
417 transmute(r)
418 }
419}
420
421/// Blends packed 8-bit integers from `a` and `b` using `mask`.
422///
423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_epi8)
424#[inline]
425#[target_feature(enable = "avx2")]
426#[cfg_attr(test, assert_instr(vpblendvb))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
429pub const fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
430 unsafe {
431 let mask: i8x32 = simd_lt(x:mask.as_i8x32(), y:i8x32::ZERO);
432 transmute(src:simd_select(mask, if_true:b.as_i8x32(), if_false:a.as_i8x32()))
433 }
434}
435
436/// Broadcasts the low packed 8-bit integer from `a` to all elements of
437/// the 128-bit returned value.
438///
439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastb_epi8)
440#[inline]
441#[target_feature(enable = "avx2")]
442#[cfg_attr(test, assert_instr(vpbroadcastb))]
443#[stable(feature = "simd_x86", since = "1.27.0")]
444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
445pub const fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
446 unsafe {
447 let ret: Simd = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
448 transmute::<i8x16, _>(src:ret)
449 }
450}
451
452/// Broadcasts the low packed 8-bit integer from `a` to all elements of
453/// the 256-bit returned value.
454///
455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastb_epi8)
456#[inline]
457#[target_feature(enable = "avx2")]
458#[cfg_attr(test, assert_instr(vpbroadcastb))]
459#[stable(feature = "simd_x86", since = "1.27.0")]
460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
461pub const fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
462 unsafe {
463 let ret: Simd = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
464 transmute::<i8x32, _>(src:ret)
465 }
466}
467
468// N.B., `simd_shuffle4` with integer data types for `a` and `b` is
469// often compiled to `vbroadcastss`.
470/// Broadcasts the low packed 32-bit integer from `a` to all elements of
471/// the 128-bit returned value.
472///
473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastd_epi32)
474#[inline]
475#[target_feature(enable = "avx2")]
476#[cfg_attr(test, assert_instr(vbroadcastss))]
477#[stable(feature = "simd_x86", since = "1.27.0")]
478#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
479pub const fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
480 unsafe {
481 let ret: Simd = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
482 transmute::<i32x4, _>(src:ret)
483 }
484}
485
486// N.B., `simd_shuffle4`` with integer data types for `a` and `b` is
487// often compiled to `vbroadcastss`.
488/// Broadcasts the low packed 32-bit integer from `a` to all elements of
489/// the 256-bit returned value.
490///
491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastd_epi32)
492#[inline]
493#[target_feature(enable = "avx2")]
494#[cfg_attr(test, assert_instr(vbroadcastss))]
495#[stable(feature = "simd_x86", since = "1.27.0")]
496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
497pub const fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
498 unsafe {
499 let ret: Simd = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
500 transmute::<i32x8, _>(src:ret)
501 }
502}
503
504/// Broadcasts the low packed 64-bit integer from `a` to all elements of
505/// the 128-bit returned value.
506///
507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastq_epi64)
508#[inline]
509#[target_feature(enable = "avx2")]
510// Emits `vmovddup` instead of `vpbroadcastq`
511// See https://github.com/rust-lang/stdarch/issues/791
512#[cfg_attr(test, assert_instr(vmovddup))]
513#[stable(feature = "simd_x86", since = "1.27.0")]
514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
515pub const fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
516 unsafe {
517 let ret: Simd = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
518 transmute::<i64x2, _>(src:ret)
519 }
520}
521
522/// Broadcasts the low packed 64-bit integer from `a` to all elements of
523/// the 256-bit returned value.
524///
525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastq_epi64)
526#[inline]
527#[target_feature(enable = "avx2")]
528#[cfg_attr(test, assert_instr(vbroadcastsd))]
529#[stable(feature = "simd_x86", since = "1.27.0")]
530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
531pub const fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
532 unsafe {
533 let ret: Simd = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
534 transmute::<i64x4, _>(src:ret)
535 }
536}
537
538/// Broadcasts the low double-precision (64-bit) floating-point element
539/// from `a` to all elements of the 128-bit returned value.
540///
541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastsd_pd)
542#[inline]
543#[target_feature(enable = "avx2")]
544#[cfg_attr(test, assert_instr(vmovddup))]
545#[stable(feature = "simd_x86", since = "1.27.0")]
546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
547pub const fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
548 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
549}
550
551/// Broadcasts the low double-precision (64-bit) floating-point element
552/// from `a` to all elements of the 256-bit returned value.
553///
554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsd_pd)
555#[inline]
556#[target_feature(enable = "avx2")]
557#[cfg_attr(test, assert_instr(vbroadcastsd))]
558#[stable(feature = "simd_x86", since = "1.27.0")]
559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
560pub const fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
561 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
562}
563
564/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
565/// the 256-bit returned value.
566///
567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastsi128_si256)
568#[inline]
569#[target_feature(enable = "avx2")]
570#[stable(feature = "simd_x86_updates", since = "1.82.0")]
571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
572pub const fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
573 unsafe {
574 let ret: Simd = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
575 transmute::<i64x4, _>(src:ret)
576 }
577}
578
579// N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or
580// `vbroadcastf128`.
581/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
582/// the 256-bit returned value.
583///
584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsi128_si256)
585#[inline]
586#[target_feature(enable = "avx2")]
587#[stable(feature = "simd_x86", since = "1.27.0")]
588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
589pub const fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
590 unsafe {
591 let ret: Simd = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
592 transmute::<i64x4, _>(src:ret)
593 }
594}
595
596/// Broadcasts the low single-precision (32-bit) floating-point element
597/// from `a` to all elements of the 128-bit returned value.
598///
599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastss_ps)
600#[inline]
601#[target_feature(enable = "avx2")]
602#[cfg_attr(test, assert_instr(vbroadcastss))]
603#[stable(feature = "simd_x86", since = "1.27.0")]
604#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
605pub const fn _mm_broadcastss_ps(a: __m128) -> __m128 {
606 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
607}
608
609/// Broadcasts the low single-precision (32-bit) floating-point element
610/// from `a` to all elements of the 256-bit returned value.
611///
612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastss_ps)
613#[inline]
614#[target_feature(enable = "avx2")]
615#[cfg_attr(test, assert_instr(vbroadcastss))]
616#[stable(feature = "simd_x86", since = "1.27.0")]
617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
618pub const fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
619 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
620}
621
622/// Broadcasts the low packed 16-bit integer from a to all elements of
623/// the 128-bit returned value
624///
625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastw_epi16)
626#[inline]
627#[target_feature(enable = "avx2")]
628#[cfg_attr(test, assert_instr(vpbroadcastw))]
629#[stable(feature = "simd_x86", since = "1.27.0")]
630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
631pub const fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
632 unsafe {
633 let ret: Simd = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
634 transmute::<i16x8, _>(src:ret)
635 }
636}
637
638/// Broadcasts the low packed 16-bit integer from a to all elements of
639/// the 256-bit returned value
640///
641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastw_epi16)
642#[inline]
643#[target_feature(enable = "avx2")]
644#[cfg_attr(test, assert_instr(vpbroadcastw))]
645#[stable(feature = "simd_x86", since = "1.27.0")]
646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
647pub const fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
648 unsafe {
649 let ret: Simd = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
650 transmute::<i16x16, _>(src:ret)
651 }
652}
653
654/// Compares packed 64-bit integers in `a` and `b` for equality.
655///
656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64)
657#[inline]
658#[target_feature(enable = "avx2")]
659#[cfg_attr(test, assert_instr(vpcmpeqq))]
660#[stable(feature = "simd_x86", since = "1.27.0")]
661#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
662pub const fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
663 unsafe { transmute::<i64x4, _>(src:simd_eq(x:a.as_i64x4(), y:b.as_i64x4())) }
664}
665
666/// Compares packed 32-bit integers in `a` and `b` for equality.
667///
668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32)
669#[inline]
670#[target_feature(enable = "avx2")]
671#[cfg_attr(test, assert_instr(vpcmpeqd))]
672#[stable(feature = "simd_x86", since = "1.27.0")]
673#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
674pub const fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
675 unsafe { transmute::<i32x8, _>(src:simd_eq(x:a.as_i32x8(), y:b.as_i32x8())) }
676}
677
678/// Compares packed 16-bit integers in `a` and `b` for equality.
679///
680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16)
681#[inline]
682#[target_feature(enable = "avx2")]
683#[cfg_attr(test, assert_instr(vpcmpeqw))]
684#[stable(feature = "simd_x86", since = "1.27.0")]
685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
686pub const fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
687 unsafe { transmute::<i16x16, _>(src:simd_eq(x:a.as_i16x16(), y:b.as_i16x16())) }
688}
689
690/// Compares packed 8-bit integers in `a` and `b` for equality.
691///
692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8)
693#[inline]
694#[target_feature(enable = "avx2")]
695#[cfg_attr(test, assert_instr(vpcmpeqb))]
696#[stable(feature = "simd_x86", since = "1.27.0")]
697#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
698pub const fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
699 unsafe { transmute::<i8x32, _>(src:simd_eq(x:a.as_i8x32(), y:b.as_i8x32())) }
700}
701
702/// Compares packed 64-bit integers in `a` and `b` for greater-than.
703///
704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64)
705#[inline]
706#[target_feature(enable = "avx2")]
707#[cfg_attr(test, assert_instr(vpcmpgtq))]
708#[stable(feature = "simd_x86", since = "1.27.0")]
709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
710pub const fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
711 unsafe { transmute::<i64x4, _>(src:simd_gt(x:a.as_i64x4(), y:b.as_i64x4())) }
712}
713
714/// Compares packed 32-bit integers in `a` and `b` for greater-than.
715///
716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32)
717#[inline]
718#[target_feature(enable = "avx2")]
719#[cfg_attr(test, assert_instr(vpcmpgtd))]
720#[stable(feature = "simd_x86", since = "1.27.0")]
721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
722pub const fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
723 unsafe { transmute::<i32x8, _>(src:simd_gt(x:a.as_i32x8(), y:b.as_i32x8())) }
724}
725
726/// Compares packed 16-bit integers in `a` and `b` for greater-than.
727///
728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16)
729#[inline]
730#[target_feature(enable = "avx2")]
731#[cfg_attr(test, assert_instr(vpcmpgtw))]
732#[stable(feature = "simd_x86", since = "1.27.0")]
733#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
734pub const fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
735 unsafe { transmute::<i16x16, _>(src:simd_gt(x:a.as_i16x16(), y:b.as_i16x16())) }
736}
737
738/// Compares packed 8-bit integers in `a` and `b` for greater-than.
739///
740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8)
741#[inline]
742#[target_feature(enable = "avx2")]
743#[cfg_attr(test, assert_instr(vpcmpgtb))]
744#[stable(feature = "simd_x86", since = "1.27.0")]
745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
746pub const fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
747 unsafe { transmute::<i8x32, _>(src:simd_gt(x:a.as_i8x32(), y:b.as_i8x32())) }
748}
749
750/// Sign-extend 16-bit integers to 32-bit integers.
751///
752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi32)
753#[inline]
754#[target_feature(enable = "avx2")]
755#[cfg_attr(test, assert_instr(vpmovsxwd))]
756#[stable(feature = "simd_x86", since = "1.27.0")]
757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
758pub const fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
759 unsafe { transmute::<i32x8, _>(src:simd_cast(a.as_i16x8())) }
760}
761
762/// Sign-extend 16-bit integers to 64-bit integers.
763///
764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi64)
765#[inline]
766#[target_feature(enable = "avx2")]
767#[cfg_attr(test, assert_instr(vpmovsxwq))]
768#[stable(feature = "simd_x86", since = "1.27.0")]
769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
770pub const fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
771 unsafe {
772 let a: Simd = a.as_i16x8();
773 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
774 transmute::<i64x4, _>(src:simd_cast(v64))
775 }
776}
777
778/// Sign-extend 32-bit integers to 64-bit integers.
779///
780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi64)
781#[inline]
782#[target_feature(enable = "avx2")]
783#[cfg_attr(test, assert_instr(vpmovsxdq))]
784#[stable(feature = "simd_x86", since = "1.27.0")]
785#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
786pub const fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
787 unsafe { transmute::<i64x4, _>(src:simd_cast(a.as_i32x4())) }
788}
789
790/// Sign-extend 8-bit integers to 16-bit integers.
791///
792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi16)
793#[inline]
794#[target_feature(enable = "avx2")]
795#[cfg_attr(test, assert_instr(vpmovsxbw))]
796#[stable(feature = "simd_x86", since = "1.27.0")]
797#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
798pub const fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
799 unsafe { transmute::<i16x16, _>(src:simd_cast(a.as_i8x16())) }
800}
801
802/// Sign-extend 8-bit integers to 32-bit integers.
803///
804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi32)
805#[inline]
806#[target_feature(enable = "avx2")]
807#[cfg_attr(test, assert_instr(vpmovsxbd))]
808#[stable(feature = "simd_x86", since = "1.27.0")]
809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
810pub const fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
811 unsafe {
812 let a: Simd = a.as_i8x16();
813 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
814 transmute::<i32x8, _>(src:simd_cast(v64))
815 }
816}
817
818/// Sign-extend 8-bit integers to 64-bit integers.
819///
820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi64)
821#[inline]
822#[target_feature(enable = "avx2")]
823#[cfg_attr(test, assert_instr(vpmovsxbq))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
826pub const fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
827 unsafe {
828 let a: Simd = a.as_i8x16();
829 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
830 transmute::<i64x4, _>(src:simd_cast(v32))
831 }
832}
833
834/// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit
835/// integers, and stores the results in `dst`.
836///
837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi32)
838#[inline]
839#[target_feature(enable = "avx2")]
840#[cfg_attr(test, assert_instr(vpmovzxwd))]
841#[stable(feature = "simd_x86", since = "1.27.0")]
842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
843pub const fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
844 unsafe { transmute::<i32x8, _>(src:simd_cast(a.as_u16x8())) }
845}
846
847/// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit
848/// integers. The upper four elements of `a` are unused.
849///
850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi64)
851#[inline]
852#[target_feature(enable = "avx2")]
853#[cfg_attr(test, assert_instr(vpmovzxwq))]
854#[stable(feature = "simd_x86", since = "1.27.0")]
855#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
856pub const fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
857 unsafe {
858 let a: Simd = a.as_u16x8();
859 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
860 transmute::<i64x4, _>(src:simd_cast(v64))
861 }
862}
863
864/// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers.
865///
866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_epi64)
867#[inline]
868#[target_feature(enable = "avx2")]
869#[cfg_attr(test, assert_instr(vpmovzxdq))]
870#[stable(feature = "simd_x86", since = "1.27.0")]
871#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
872pub const fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
873 unsafe { transmute::<i64x4, _>(src:simd_cast(a.as_u32x4())) }
874}
875
876/// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers.
877///
878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi16)
879#[inline]
880#[target_feature(enable = "avx2")]
881#[cfg_attr(test, assert_instr(vpmovzxbw))]
882#[stable(feature = "simd_x86", since = "1.27.0")]
883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
884pub const fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
885 unsafe { transmute::<i16x16, _>(src:simd_cast(a.as_u8x16())) }
886}
887
888/// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit
889/// integers. The upper eight elements of `a` are unused.
890///
891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi32)
892#[inline]
893#[target_feature(enable = "avx2")]
894#[cfg_attr(test, assert_instr(vpmovzxbd))]
895#[stable(feature = "simd_x86", since = "1.27.0")]
896#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
897pub const fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
898 unsafe {
899 let a: Simd = a.as_u8x16();
900 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
901 transmute::<i32x8, _>(src:simd_cast(v64))
902 }
903}
904
905/// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit
906/// integers. The upper twelve elements of `a` are unused.
907///
908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi64)
909#[inline]
910#[target_feature(enable = "avx2")]
911#[cfg_attr(test, assert_instr(vpmovzxbq))]
912#[stable(feature = "simd_x86", since = "1.27.0")]
913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
914pub const fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
915 unsafe {
916 let a: Simd = a.as_u8x16();
917 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
918 transmute::<i64x4, _>(src:simd_cast(v32))
919 }
920}
921
922/// Extracts 128 bits (of integer data) from `a` selected with `IMM1`.
923///
924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti128_si256)
925#[inline]
926#[target_feature(enable = "avx2")]
927#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
928#[rustc_legacy_const_generics(1)]
929#[stable(feature = "simd_x86", since = "1.27.0")]
930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
931pub const fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
932 static_assert_uimm_bits!(IMM1, 1);
933 unsafe {
934 let a: Simd = a.as_i64x4();
935 let b: Simd = i64x4::ZERO;
936 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
937 transmute(src:dst)
938 }
939}
940
941/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`.
942///
943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi16)
944#[inline]
945#[target_feature(enable = "avx2")]
946#[cfg_attr(test, assert_instr(vphaddw))]
947#[stable(feature = "simd_x86", since = "1.27.0")]
948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
949pub const fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
950 let a: Simd = a.as_i16x16();
951 let b: Simd = b.as_i16x16();
952 unsafe {
953 let even: i16x16 = simd_shuffle!(
954 a,
955 b,
956 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
957 );
958 let odd: i16x16 = simd_shuffle!(
959 a,
960 b,
961 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
962 );
963 simd_add(x:even, y:odd).as_m256i()
964 }
965}
966
967/// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`.
968///
969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi32)
970#[inline]
971#[target_feature(enable = "avx2")]
972#[cfg_attr(test, assert_instr(vphaddd))]
973#[stable(feature = "simd_x86", since = "1.27.0")]
974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
975pub const fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
976 let a: Simd = a.as_i32x8();
977 let b: Simd = b.as_i32x8();
978 unsafe {
979 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
980 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
981 simd_add(x:even, y:odd).as_m256i()
982 }
983}
984
985/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`
986/// using saturation.
987///
988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadds_epi16)
989#[inline]
990#[target_feature(enable = "avx2")]
991#[cfg_attr(test, assert_instr(vphaddsw))]
992#[stable(feature = "simd_x86", since = "1.27.0")]
993pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
994 let a: Simd = a.as_i16x16();
995 let b: Simd = b.as_i16x16();
996 unsafe {
997 let even: i16x16 = simd_shuffle!(
998 a,
999 b,
1000 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
1001 );
1002 let odd: i16x16 = simd_shuffle!(
1003 a,
1004 b,
1005 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
1006 );
1007 simd_saturating_add(x:even, y:odd).as_m256i()
1008 }
1009}
1010
1011/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`.
1012///
1013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi16)
1014#[inline]
1015#[target_feature(enable = "avx2")]
1016#[cfg_attr(test, assert_instr(vphsubw))]
1017#[stable(feature = "simd_x86", since = "1.27.0")]
1018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1019pub const fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
1020 let a: Simd = a.as_i16x16();
1021 let b: Simd = b.as_i16x16();
1022 unsafe {
1023 let even: i16x16 = simd_shuffle!(
1024 a,
1025 b,
1026 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
1027 );
1028 let odd: i16x16 = simd_shuffle!(
1029 a,
1030 b,
1031 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
1032 );
1033 simd_sub(lhs:even, rhs:odd).as_m256i()
1034 }
1035}
1036
1037/// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`.
1038///
1039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi32)
1040#[inline]
1041#[target_feature(enable = "avx2")]
1042#[cfg_attr(test, assert_instr(vphsubd))]
1043#[stable(feature = "simd_x86", since = "1.27.0")]
1044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1045pub const fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
1046 let a: Simd = a.as_i32x8();
1047 let b: Simd = b.as_i32x8();
1048 unsafe {
1049 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
1050 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
1051 simd_sub(lhs:even, rhs:odd).as_m256i()
1052 }
1053}
1054
1055/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
1056/// using saturation.
1057///
1058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsubs_epi16)
1059#[inline]
1060#[target_feature(enable = "avx2")]
1061#[cfg_attr(test, assert_instr(vphsubsw))]
1062#[stable(feature = "simd_x86", since = "1.27.0")]
1063pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1064 let a: Simd = a.as_i16x16();
1065 let b: Simd = b.as_i16x16();
1066 unsafe {
1067 let even: i16x16 = simd_shuffle!(
1068 a,
1069 b,
1070 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
1071 );
1072 let odd: i16x16 = simd_shuffle!(
1073 a,
1074 b,
1075 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
1076 );
1077 simd_saturating_sub(lhs:even, rhs:odd).as_m256i()
1078 }
1079}
1080
1081/// Returns values from `slice` at offsets determined by `offsets * scale`,
1082/// where
1083/// `scale` should be 1, 2, 4 or 8.
1084///
1085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi32)
1086#[inline]
1087#[target_feature(enable = "avx2")]
1088#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1089#[rustc_legacy_const_generics(2)]
1090#[stable(feature = "simd_x86", since = "1.27.0")]
1091pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
1092 slice: *const i32,
1093 offsets: __m128i,
1094) -> __m128i {
1095 static_assert_imm8_scale!(SCALE);
1096 let zero: Simd = i32x4::ZERO;
1097 let neg_one: Simd = _mm_set1_epi32(-1).as_i32x4();
1098 let offsets: Simd = offsets.as_i32x4();
1099 let slice: *const i8 = slice as *const i8;
1100 let r: Simd = pgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1101 transmute(src:r)
1102}
1103
1104/// Returns values from `slice` at offsets determined by `offsets * scale`,
1105/// where
1106/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1107/// that position instead.
1108///
1109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi32)
1110#[inline]
1111#[target_feature(enable = "avx2")]
1112#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1113#[rustc_legacy_const_generics(4)]
1114#[stable(feature = "simd_x86", since = "1.27.0")]
1115pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1116 src: __m128i,
1117 slice: *const i32,
1118 offsets: __m128i,
1119 mask: __m128i,
1120) -> __m128i {
1121 static_assert_imm8_scale!(SCALE);
1122 let src: Simd = src.as_i32x4();
1123 let mask: Simd = mask.as_i32x4();
1124 let offsets: Simd = offsets.as_i32x4();
1125 let slice: *const i8 = slice as *const i8;
1126 let r: Simd = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1127 transmute(src:r)
1128}
1129
1130/// Returns values from `slice` at offsets determined by `offsets * scale`,
1131/// where
1132/// `scale` should be 1, 2, 4 or 8.
1133///
1134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi32)
1135#[inline]
1136#[target_feature(enable = "avx2")]
1137#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1138#[rustc_legacy_const_generics(2)]
1139#[stable(feature = "simd_x86", since = "1.27.0")]
1140pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1141 slice: *const i32,
1142 offsets: __m256i,
1143) -> __m256i {
1144 static_assert_imm8_scale!(SCALE);
1145 let zero: Simd = i32x8::ZERO;
1146 let neg_one: Simd = _mm256_set1_epi32(-1).as_i32x8();
1147 let offsets: Simd = offsets.as_i32x8();
1148 let slice: *const i8 = slice as *const i8;
1149 let r: Simd = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1150 transmute(src:r)
1151}
1152
1153/// Returns values from `slice` at offsets determined by `offsets * scale`,
1154/// where
1155/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1156/// that position instead.
1157///
1158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi32)
1159#[inline]
1160#[target_feature(enable = "avx2")]
1161#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1162#[rustc_legacy_const_generics(4)]
1163#[stable(feature = "simd_x86", since = "1.27.0")]
1164pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1165 src: __m256i,
1166 slice: *const i32,
1167 offsets: __m256i,
1168 mask: __m256i,
1169) -> __m256i {
1170 static_assert_imm8_scale!(SCALE);
1171 let src: Simd = src.as_i32x8();
1172 let mask: Simd = mask.as_i32x8();
1173 let offsets: Simd = offsets.as_i32x8();
1174 let slice: *const i8 = slice as *const i8;
1175 let r: Simd = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1176 transmute(src:r)
1177}
1178
1179/// Returns values from `slice` at offsets determined by `offsets * scale`,
1180/// where
1181/// `scale` should be 1, 2, 4 or 8.
1182///
1183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_ps)
1184#[inline]
1185#[target_feature(enable = "avx2")]
1186#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1187#[rustc_legacy_const_generics(2)]
1188#[stable(feature = "simd_x86", since = "1.27.0")]
1189pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1190 static_assert_imm8_scale!(SCALE);
1191 let zero: __m128 = _mm_setzero_ps();
1192 let neg_one: __m128 = _mm_set1_ps(-1.0);
1193 let offsets: Simd = offsets.as_i32x4();
1194 let slice: *const i8 = slice as *const i8;
1195 pgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1196}
1197
1198/// Returns values from `slice` at offsets determined by `offsets * scale`,
1199/// where
1200/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1201/// that position instead.
1202///
1203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_ps)
1204#[inline]
1205#[target_feature(enable = "avx2")]
1206#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1207#[rustc_legacy_const_generics(4)]
1208#[stable(feature = "simd_x86", since = "1.27.0")]
1209pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1210 src: __m128,
1211 slice: *const f32,
1212 offsets: __m128i,
1213 mask: __m128,
1214) -> __m128 {
1215 static_assert_imm8_scale!(SCALE);
1216 let offsets: Simd = offsets.as_i32x4();
1217 let slice: *const i8 = slice as *const i8;
1218 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1219}
1220
1221/// Returns values from `slice` at offsets determined by `offsets * scale`,
1222/// where
1223/// `scale` should be 1, 2, 4 or 8.
1224///
1225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_ps)
1226#[inline]
1227#[target_feature(enable = "avx2")]
1228#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1229#[rustc_legacy_const_generics(2)]
1230#[stable(feature = "simd_x86", since = "1.27.0")]
1231pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1232 static_assert_imm8_scale!(SCALE);
1233 let zero: __m256 = _mm256_setzero_ps();
1234 let neg_one: __m256 = _mm256_set1_ps(-1.0);
1235 let offsets: Simd = offsets.as_i32x8();
1236 let slice: *const i8 = slice as *const i8;
1237 vpgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1238}
1239
1240/// Returns values from `slice` at offsets determined by `offsets * scale`,
1241/// where
1242/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1243/// that position instead.
1244///
1245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_ps)
1246#[inline]
1247#[target_feature(enable = "avx2")]
1248#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1249#[rustc_legacy_const_generics(4)]
1250#[stable(feature = "simd_x86", since = "1.27.0")]
1251pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1252 src: __m256,
1253 slice: *const f32,
1254 offsets: __m256i,
1255 mask: __m256,
1256) -> __m256 {
1257 static_assert_imm8_scale!(SCALE);
1258 let offsets: Simd = offsets.as_i32x8();
1259 let slice: *const i8 = slice as *const i8;
1260 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1261}
1262
1263/// Returns values from `slice` at offsets determined by `offsets * scale`,
1264/// where
1265/// `scale` should be 1, 2, 4 or 8.
1266///
1267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi64)
1268#[inline]
1269#[target_feature(enable = "avx2")]
1270#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1271#[rustc_legacy_const_generics(2)]
1272#[stable(feature = "simd_x86", since = "1.27.0")]
1273pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1274 slice: *const i64,
1275 offsets: __m128i,
1276) -> __m128i {
1277 static_assert_imm8_scale!(SCALE);
1278 let zero: Simd = i64x2::ZERO;
1279 let neg_one: Simd = _mm_set1_epi64x(-1).as_i64x2();
1280 let offsets: Simd = offsets.as_i32x4();
1281 let slice: *const i8 = slice as *const i8;
1282 let r: Simd = pgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1283 transmute(src:r)
1284}
1285
1286/// Returns values from `slice` at offsets determined by `offsets * scale`,
1287/// where
1288/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1289/// that position instead.
1290///
1291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi64)
1292#[inline]
1293#[target_feature(enable = "avx2")]
1294#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1295#[rustc_legacy_const_generics(4)]
1296#[stable(feature = "simd_x86", since = "1.27.0")]
1297pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1298 src: __m128i,
1299 slice: *const i64,
1300 offsets: __m128i,
1301 mask: __m128i,
1302) -> __m128i {
1303 static_assert_imm8_scale!(SCALE);
1304 let src: Simd = src.as_i64x2();
1305 let mask: Simd = mask.as_i64x2();
1306 let offsets: Simd = offsets.as_i32x4();
1307 let slice: *const i8 = slice as *const i8;
1308 let r: Simd = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1309 transmute(src:r)
1310}
1311
1312/// Returns values from `slice` at offsets determined by `offsets * scale`,
1313/// where
1314/// `scale` should be 1, 2, 4 or 8.
1315///
1316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi64)
1317#[inline]
1318#[target_feature(enable = "avx2")]
1319#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1320#[rustc_legacy_const_generics(2)]
1321#[stable(feature = "simd_x86", since = "1.27.0")]
1322pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1323 slice: *const i64,
1324 offsets: __m128i,
1325) -> __m256i {
1326 static_assert_imm8_scale!(SCALE);
1327 let zero: Simd = i64x4::ZERO;
1328 let neg_one: Simd = _mm256_set1_epi64x(-1).as_i64x4();
1329 let offsets: Simd = offsets.as_i32x4();
1330 let slice: *const i8 = slice as *const i8;
1331 let r: Simd = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1332 transmute(src:r)
1333}
1334
1335/// Returns values from `slice` at offsets determined by `offsets * scale`,
1336/// where
1337/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1338/// that position instead.
1339///
1340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi64)
1341#[inline]
1342#[target_feature(enable = "avx2")]
1343#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1344#[rustc_legacy_const_generics(4)]
1345#[stable(feature = "simd_x86", since = "1.27.0")]
1346pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1347 src: __m256i,
1348 slice: *const i64,
1349 offsets: __m128i,
1350 mask: __m256i,
1351) -> __m256i {
1352 static_assert_imm8_scale!(SCALE);
1353 let src: Simd = src.as_i64x4();
1354 let mask: Simd = mask.as_i64x4();
1355 let offsets: Simd = offsets.as_i32x4();
1356 let slice: *const i8 = slice as *const i8;
1357 let r: Simd = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1358 transmute(src:r)
1359}
1360
1361/// Returns values from `slice` at offsets determined by `offsets * scale`,
1362/// where
1363/// `scale` should be 1, 2, 4 or 8.
1364///
1365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_pd)
1366#[inline]
1367#[target_feature(enable = "avx2")]
1368#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1369#[rustc_legacy_const_generics(2)]
1370#[stable(feature = "simd_x86", since = "1.27.0")]
1371pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1372 static_assert_imm8_scale!(SCALE);
1373 let zero: __m128d = _mm_setzero_pd();
1374 let neg_one: __m128d = _mm_set1_pd(-1.0);
1375 let offsets: Simd = offsets.as_i32x4();
1376 let slice: *const i8 = slice as *const i8;
1377 pgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1378}
1379
1380/// Returns values from `slice` at offsets determined by `offsets * scale`,
1381/// where
1382/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1383/// that position instead.
1384///
1385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_pd)
1386#[inline]
1387#[target_feature(enable = "avx2")]
1388#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1389#[rustc_legacy_const_generics(4)]
1390#[stable(feature = "simd_x86", since = "1.27.0")]
1391pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1392 src: __m128d,
1393 slice: *const f64,
1394 offsets: __m128i,
1395 mask: __m128d,
1396) -> __m128d {
1397 static_assert_imm8_scale!(SCALE);
1398 let offsets: Simd = offsets.as_i32x4();
1399 let slice: *const i8 = slice as *const i8;
1400 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1401}
1402
1403/// Returns values from `slice` at offsets determined by `offsets * scale`,
1404/// where
1405/// `scale` should be 1, 2, 4 or 8.
1406///
1407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_pd)
1408#[inline]
1409#[target_feature(enable = "avx2")]
1410#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1411#[rustc_legacy_const_generics(2)]
1412#[stable(feature = "simd_x86", since = "1.27.0")]
1413pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1414 slice: *const f64,
1415 offsets: __m128i,
1416) -> __m256d {
1417 static_assert_imm8_scale!(SCALE);
1418 let zero: __m256d = _mm256_setzero_pd();
1419 let neg_one: __m256d = _mm256_set1_pd(-1.0);
1420 let offsets: Simd = offsets.as_i32x4();
1421 let slice: *const i8 = slice as *const i8;
1422 vpgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1423}
1424
1425/// Returns values from `slice` at offsets determined by `offsets * scale`,
1426/// where
1427/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1428/// that position instead.
1429///
1430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_pd)
1431#[inline]
1432#[target_feature(enable = "avx2")]
1433#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1434#[rustc_legacy_const_generics(4)]
1435#[stable(feature = "simd_x86", since = "1.27.0")]
1436pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1437 src: __m256d,
1438 slice: *const f64,
1439 offsets: __m128i,
1440 mask: __m256d,
1441) -> __m256d {
1442 static_assert_imm8_scale!(SCALE);
1443 let offsets: Simd = offsets.as_i32x4();
1444 let slice: *const i8 = slice as *const i8;
1445 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1446}
1447
1448/// Returns values from `slice` at offsets determined by `offsets * scale`,
1449/// where
1450/// `scale` should be 1, 2, 4 or 8.
1451///
1452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi32)
1453#[inline]
1454#[target_feature(enable = "avx2")]
1455#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1456#[rustc_legacy_const_generics(2)]
1457#[stable(feature = "simd_x86", since = "1.27.0")]
1458pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1459 slice: *const i32,
1460 offsets: __m128i,
1461) -> __m128i {
1462 static_assert_imm8_scale!(SCALE);
1463 let zero: Simd = i32x4::ZERO;
1464 let neg_one: Simd = _mm_set1_epi64x(-1).as_i32x4();
1465 let offsets: Simd = offsets.as_i64x2();
1466 let slice: *const i8 = slice as *const i8;
1467 let r: Simd = pgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1468 transmute(src:r)
1469}
1470
1471/// Returns values from `slice` at offsets determined by `offsets * scale`,
1472/// where
1473/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1474/// that position instead.
1475///
1476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi32)
1477#[inline]
1478#[target_feature(enable = "avx2")]
1479#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1480#[rustc_legacy_const_generics(4)]
1481#[stable(feature = "simd_x86", since = "1.27.0")]
1482pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1483 src: __m128i,
1484 slice: *const i32,
1485 offsets: __m128i,
1486 mask: __m128i,
1487) -> __m128i {
1488 static_assert_imm8_scale!(SCALE);
1489 let src: Simd = src.as_i32x4();
1490 let mask: Simd = mask.as_i32x4();
1491 let offsets: Simd = offsets.as_i64x2();
1492 let slice: *const i8 = slice as *const i8;
1493 let r: Simd = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1494 transmute(src:r)
1495}
1496
1497/// Returns values from `slice` at offsets determined by `offsets * scale`,
1498/// where
1499/// `scale` should be 1, 2, 4 or 8.
1500///
1501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi32)
1502#[inline]
1503#[target_feature(enable = "avx2")]
1504#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1505#[rustc_legacy_const_generics(2)]
1506#[stable(feature = "simd_x86", since = "1.27.0")]
1507pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1508 slice: *const i32,
1509 offsets: __m256i,
1510) -> __m128i {
1511 static_assert_imm8_scale!(SCALE);
1512 let zero: Simd = i32x4::ZERO;
1513 let neg_one: Simd = _mm_set1_epi64x(-1).as_i32x4();
1514 let offsets: Simd = offsets.as_i64x4();
1515 let slice: *const i8 = slice as *const i8;
1516 let r: Simd = vpgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1517 transmute(src:r)
1518}
1519
1520/// Returns values from `slice` at offsets determined by `offsets * scale`,
1521/// where
1522/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1523/// that position instead.
1524///
1525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi32)
1526#[inline]
1527#[target_feature(enable = "avx2")]
1528#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1529#[rustc_legacy_const_generics(4)]
1530#[stable(feature = "simd_x86", since = "1.27.0")]
1531pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1532 src: __m128i,
1533 slice: *const i32,
1534 offsets: __m256i,
1535 mask: __m128i,
1536) -> __m128i {
1537 static_assert_imm8_scale!(SCALE);
1538 let src: Simd = src.as_i32x4();
1539 let mask: Simd = mask.as_i32x4();
1540 let offsets: Simd = offsets.as_i64x4();
1541 let slice: *const i8 = slice as *const i8;
1542 let r: Simd = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1543 transmute(src:r)
1544}
1545
1546/// Returns values from `slice` at offsets determined by `offsets * scale`,
1547/// where
1548/// `scale` should be 1, 2, 4 or 8.
1549///
1550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_ps)
1551#[inline]
1552#[target_feature(enable = "avx2")]
1553#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1554#[rustc_legacy_const_generics(2)]
1555#[stable(feature = "simd_x86", since = "1.27.0")]
1556pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1557 static_assert_imm8_scale!(SCALE);
1558 let zero: __m128 = _mm_setzero_ps();
1559 let neg_one: __m128 = _mm_set1_ps(-1.0);
1560 let offsets: Simd = offsets.as_i64x2();
1561 let slice: *const i8 = slice as *const i8;
1562 pgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1563}
1564
1565/// Returns values from `slice` at offsets determined by `offsets * scale`,
1566/// where
1567/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1568/// that position instead.
1569///
1570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_ps)
1571#[inline]
1572#[target_feature(enable = "avx2")]
1573#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1574#[rustc_legacy_const_generics(4)]
1575#[stable(feature = "simd_x86", since = "1.27.0")]
1576pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1577 src: __m128,
1578 slice: *const f32,
1579 offsets: __m128i,
1580 mask: __m128,
1581) -> __m128 {
1582 static_assert_imm8_scale!(SCALE);
1583 let offsets: Simd = offsets.as_i64x2();
1584 let slice: *const i8 = slice as *const i8;
1585 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1586}
1587
1588/// Returns values from `slice` at offsets determined by `offsets * scale`,
1589/// where
1590/// `scale` should be 1, 2, 4 or 8.
1591///
1592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_ps)
1593#[inline]
1594#[target_feature(enable = "avx2")]
1595#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1596#[rustc_legacy_const_generics(2)]
1597#[stable(feature = "simd_x86", since = "1.27.0")]
1598pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1599 static_assert_imm8_scale!(SCALE);
1600 let zero: __m128 = _mm_setzero_ps();
1601 let neg_one: __m128 = _mm_set1_ps(-1.0);
1602 let offsets: Simd = offsets.as_i64x4();
1603 let slice: *const i8 = slice as *const i8;
1604 vpgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1605}
1606
1607/// Returns values from `slice` at offsets determined by `offsets * scale`,
1608/// where
1609/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1610/// that position instead.
1611///
1612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_ps)
1613#[inline]
1614#[target_feature(enable = "avx2")]
1615#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1616#[rustc_legacy_const_generics(4)]
1617#[stable(feature = "simd_x86", since = "1.27.0")]
1618pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1619 src: __m128,
1620 slice: *const f32,
1621 offsets: __m256i,
1622 mask: __m128,
1623) -> __m128 {
1624 static_assert_imm8_scale!(SCALE);
1625 let offsets: Simd = offsets.as_i64x4();
1626 let slice: *const i8 = slice as *const i8;
1627 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1628}
1629
1630/// Returns values from `slice` at offsets determined by `offsets * scale`,
1631/// where
1632/// `scale` should be 1, 2, 4 or 8.
1633///
1634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi64)
1635#[inline]
1636#[target_feature(enable = "avx2")]
1637#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1638#[rustc_legacy_const_generics(2)]
1639#[stable(feature = "simd_x86", since = "1.27.0")]
1640pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1641 slice: *const i64,
1642 offsets: __m128i,
1643) -> __m128i {
1644 static_assert_imm8_scale!(SCALE);
1645 let zero: Simd = i64x2::ZERO;
1646 let neg_one: Simd = _mm_set1_epi64x(-1).as_i64x2();
1647 let slice: *const i8 = slice as *const i8;
1648 let offsets: Simd = offsets.as_i64x2();
1649 let r: Simd = pgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1650 transmute(src:r)
1651}
1652
1653/// Returns values from `slice` at offsets determined by `offsets * scale`,
1654/// where
1655/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1656/// that position instead.
1657///
1658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi64)
1659#[inline]
1660#[target_feature(enable = "avx2")]
1661#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1662#[rustc_legacy_const_generics(4)]
1663#[stable(feature = "simd_x86", since = "1.27.0")]
1664pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1665 src: __m128i,
1666 slice: *const i64,
1667 offsets: __m128i,
1668 mask: __m128i,
1669) -> __m128i {
1670 static_assert_imm8_scale!(SCALE);
1671 let src: Simd = src.as_i64x2();
1672 let mask: Simd = mask.as_i64x2();
1673 let offsets: Simd = offsets.as_i64x2();
1674 let slice: *const i8 = slice as *const i8;
1675 let r: Simd = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1676 transmute(src:r)
1677}
1678
1679/// Returns values from `slice` at offsets determined by `offsets * scale`,
1680/// where
1681/// `scale` should be 1, 2, 4 or 8.
1682///
1683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi64)
1684#[inline]
1685#[target_feature(enable = "avx2")]
1686#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1687#[rustc_legacy_const_generics(2)]
1688#[stable(feature = "simd_x86", since = "1.27.0")]
1689pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1690 slice: *const i64,
1691 offsets: __m256i,
1692) -> __m256i {
1693 static_assert_imm8_scale!(SCALE);
1694 let zero: Simd = i64x4::ZERO;
1695 let neg_one: Simd = _mm256_set1_epi64x(-1).as_i64x4();
1696 let slice: *const i8 = slice as *const i8;
1697 let offsets: Simd = offsets.as_i64x4();
1698 let r: Simd = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1699 transmute(src:r)
1700}
1701
1702/// Returns values from `slice` at offsets determined by `offsets * scale`,
1703/// where
1704/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1705/// that position instead.
1706///
1707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi64)
1708#[inline]
1709#[target_feature(enable = "avx2")]
1710#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1711#[rustc_legacy_const_generics(4)]
1712#[stable(feature = "simd_x86", since = "1.27.0")]
1713pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1714 src: __m256i,
1715 slice: *const i64,
1716 offsets: __m256i,
1717 mask: __m256i,
1718) -> __m256i {
1719 static_assert_imm8_scale!(SCALE);
1720 let src: Simd = src.as_i64x4();
1721 let mask: Simd = mask.as_i64x4();
1722 let offsets: Simd = offsets.as_i64x4();
1723 let slice: *const i8 = slice as *const i8;
1724 let r: Simd = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1725 transmute(src:r)
1726}
1727
1728/// Returns values from `slice` at offsets determined by `offsets * scale`,
1729/// where
1730/// `scale` should be 1, 2, 4 or 8.
1731///
1732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd)
1733#[inline]
1734#[target_feature(enable = "avx2")]
1735#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1736#[rustc_legacy_const_generics(2)]
1737#[stable(feature = "simd_x86", since = "1.27.0")]
1738pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1739 static_assert_imm8_scale!(SCALE);
1740 let zero: __m128d = _mm_setzero_pd();
1741 let neg_one: __m128d = _mm_set1_pd(-1.0);
1742 let slice: *const i8 = slice as *const i8;
1743 let offsets: Simd = offsets.as_i64x2();
1744 pgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1745}
1746
1747/// Returns values from `slice` at offsets determined by `offsets * scale`,
1748/// where
1749/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1750/// that position instead.
1751///
1752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd)
1753#[inline]
1754#[target_feature(enable = "avx2")]
1755#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1756#[rustc_legacy_const_generics(4)]
1757#[stable(feature = "simd_x86", since = "1.27.0")]
1758pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1759 src: __m128d,
1760 slice: *const f64,
1761 offsets: __m128i,
1762 mask: __m128d,
1763) -> __m128d {
1764 static_assert_imm8_scale!(SCALE);
1765 let slice: *const i8 = slice as *const i8;
1766 let offsets: Simd = offsets.as_i64x2();
1767 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1768}
1769
1770/// Returns values from `slice` at offsets determined by `offsets * scale`,
1771/// where
1772/// `scale` should be 1, 2, 4 or 8.
1773///
1774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd)
1775#[inline]
1776#[target_feature(enable = "avx2")]
1777#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1778#[rustc_legacy_const_generics(2)]
1779#[stable(feature = "simd_x86", since = "1.27.0")]
1780pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1781 slice: *const f64,
1782 offsets: __m256i,
1783) -> __m256d {
1784 static_assert_imm8_scale!(SCALE);
1785 let zero: __m256d = _mm256_setzero_pd();
1786 let neg_one: __m256d = _mm256_set1_pd(-1.0);
1787 let slice: *const i8 = slice as *const i8;
1788 let offsets: Simd = offsets.as_i64x4();
1789 vpgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1790}
1791
1792/// Returns values from `slice` at offsets determined by `offsets * scale`,
1793/// where
1794/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1795/// that position instead.
1796///
1797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd)
1798#[inline]
1799#[target_feature(enable = "avx2")]
1800#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1801#[rustc_legacy_const_generics(4)]
1802#[stable(feature = "simd_x86", since = "1.27.0")]
1803pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1804 src: __m256d,
1805 slice: *const f64,
1806 offsets: __m256i,
1807 mask: __m256d,
1808) -> __m256d {
1809 static_assert_imm8_scale!(SCALE);
1810 let slice: *const i8 = slice as *const i8;
1811 let offsets: Simd = offsets.as_i64x4();
1812 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1813}
1814
1815/// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
1816/// location specified by `IMM1`.
1817///
1818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti128_si256)
1819#[inline]
1820#[target_feature(enable = "avx2")]
1821#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1822#[rustc_legacy_const_generics(2)]
1823#[stable(feature = "simd_x86", since = "1.27.0")]
1824#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1825pub const fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1826 static_assert_uimm_bits!(IMM1, 1);
1827 unsafe {
1828 let a: Simd = a.as_i64x4();
1829 let b: Simd = _mm256_castsi128_si256(b).as_i64x4();
1830 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1831 transmute(src:dst)
1832 }
1833}
1834
1835/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
1836/// intermediate signed 32-bit integers. Horizontally add adjacent pairs
1837/// of intermediate 32-bit integers.
1838///
1839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd_epi16)
1840#[inline]
1841#[target_feature(enable = "avx2")]
1842#[cfg_attr(test, assert_instr(vpmaddwd))]
1843#[stable(feature = "simd_x86", since = "1.27.0")]
1844pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1845 // It's a trick used in the Adler-32 algorithm to perform a widening addition.
1846 //
1847 // ```rust
1848 // #[target_feature(enable = "avx2")]
1849 // unsafe fn widening_add(mad: __m256i) -> __m256i {
1850 // _mm256_madd_epi16(mad, _mm256_set1_epi16(1))
1851 // }
1852 // ```
1853 //
1854 // If we implement this using generic vector intrinsics, the optimizer
1855 // will eliminate this pattern, and `vpmaddwd` will no longer be emitted.
1856 // For this reason, we use x86 intrinsics.
1857 unsafe { transmute(src:pmaddwd(a.as_i16x16(), b.as_i16x16())) }
1858}
1859
1860/// Vertically multiplies each unsigned 8-bit integer from `a` with the
1861/// corresponding signed 8-bit integer from `b`, producing intermediate
1862/// signed 16-bit integers. Horizontally add adjacent pairs of intermediate
1863/// signed 16-bit integers
1864///
1865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16)
1866#[inline]
1867#[target_feature(enable = "avx2")]
1868#[cfg_attr(test, assert_instr(vpmaddubsw))]
1869#[stable(feature = "simd_x86", since = "1.27.0")]
1870pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1871 unsafe { transmute(src:pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
1872}
1873
1874/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1875/// (elements are zeroed out when the highest bit is not set in the
1876/// corresponding element).
1877///
1878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi32)
1879#[inline]
1880#[target_feature(enable = "avx2")]
1881#[cfg_attr(test, assert_instr(vpmaskmovd))]
1882#[stable(feature = "simd_x86", since = "1.27.0")]
1883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1884pub const unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1885 let mask: Simd = simd_shr(lhs:mask.as_i32x4(), rhs:i32x4::splat(31));
1886 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
1887}
1888
1889/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1890/// (elements are zeroed out when the highest bit is not set in the
1891/// corresponding element).
1892///
1893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi32)
1894#[inline]
1895#[target_feature(enable = "avx2")]
1896#[cfg_attr(test, assert_instr(vpmaskmovd))]
1897#[stable(feature = "simd_x86", since = "1.27.0")]
1898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1899pub const unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1900 let mask: Simd = simd_shr(lhs:mask.as_i32x8(), rhs:i32x8::splat(31));
1901 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
1902}
1903
1904/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
1905/// (elements are zeroed out when the highest bit is not set in the
1906/// corresponding element).
1907///
1908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi64)
1909#[inline]
1910#[target_feature(enable = "avx2")]
1911#[cfg_attr(test, assert_instr(vpmaskmovq))]
1912#[stable(feature = "simd_x86", since = "1.27.0")]
1913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1914pub const unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1915 let mask: Simd = simd_shr(lhs:mask.as_i64x2(), rhs:i64x2::splat(63));
1916 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
1917}
1918
1919/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
1920/// (elements are zeroed out when the highest bit is not set in the
1921/// corresponding element).
1922///
1923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi64)
1924#[inline]
1925#[target_feature(enable = "avx2")]
1926#[cfg_attr(test, assert_instr(vpmaskmovq))]
1927#[stable(feature = "simd_x86", since = "1.27.0")]
1928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1929pub const unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1930 let mask: Simd = simd_shr(lhs:mask.as_i64x4(), rhs:i64x4::splat(63));
1931 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
1932}
1933
1934/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
1935/// using `mask` (elements are not stored when the highest bit is not set
1936/// in the corresponding element).
1937///
1938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi32)
1939#[inline]
1940#[target_feature(enable = "avx2")]
1941#[cfg_attr(test, assert_instr(vpmaskmovd))]
1942#[stable(feature = "simd_x86", since = "1.27.0")]
1943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1944pub const unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1945 let mask: Simd = simd_shr(lhs:mask.as_i32x4(), rhs:i32x4::splat(31));
1946 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
1947}
1948
1949/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
1950/// using `mask` (elements are not stored when the highest bit is not set
1951/// in the corresponding element).
1952///
1953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi32)
1954#[inline]
1955#[target_feature(enable = "avx2")]
1956#[cfg_attr(test, assert_instr(vpmaskmovd))]
1957#[stable(feature = "simd_x86", since = "1.27.0")]
1958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1959pub const unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1960 let mask: Simd = simd_shr(lhs:mask.as_i32x8(), rhs:i32x8::splat(31));
1961 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
1962}
1963
1964/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
1965/// using `mask` (elements are not stored when the highest bit is not set
1966/// in the corresponding element).
1967///
1968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi64)
1969#[inline]
1970#[target_feature(enable = "avx2")]
1971#[cfg_attr(test, assert_instr(vpmaskmovq))]
1972#[stable(feature = "simd_x86", since = "1.27.0")]
1973#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1974pub const unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1975 let mask: Simd = simd_shr(lhs:mask.as_i64x2(), rhs:i64x2::splat(63));
1976 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
1977}
1978
1979/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
1980/// using `mask` (elements are not stored when the highest bit is not set
1981/// in the corresponding element).
1982///
1983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi64)
1984#[inline]
1985#[target_feature(enable = "avx2")]
1986#[cfg_attr(test, assert_instr(vpmaskmovq))]
1987#[stable(feature = "simd_x86", since = "1.27.0")]
1988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1989pub const unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1990 let mask: Simd = simd_shr(lhs:mask.as_i64x4(), rhs:i64x4::splat(63));
1991 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
1992}
1993
1994/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
1995/// maximum values.
1996///
1997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi16)
1998#[inline]
1999#[target_feature(enable = "avx2")]
2000#[cfg_attr(test, assert_instr(vpmaxsw))]
2001#[stable(feature = "simd_x86", since = "1.27.0")]
2002#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2003pub const fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
2004 unsafe { simd_imax(a.as_i16x16(), b.as_i16x16()).as_m256i() }
2005}
2006
2007/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
2008/// maximum values.
2009///
2010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi32)
2011#[inline]
2012#[target_feature(enable = "avx2")]
2013#[cfg_attr(test, assert_instr(vpmaxsd))]
2014#[stable(feature = "simd_x86", since = "1.27.0")]
2015#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2016pub const fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
2017 unsafe { simd_imax(a.as_i32x8(), b.as_i32x8()).as_m256i() }
2018}
2019
2020/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
2021/// maximum values.
2022///
2023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi8)
2024#[inline]
2025#[target_feature(enable = "avx2")]
2026#[cfg_attr(test, assert_instr(vpmaxsb))]
2027#[stable(feature = "simd_x86", since = "1.27.0")]
2028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2029pub const fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
2030 unsafe { simd_imax(a.as_i8x32(), b.as_i8x32()).as_m256i() }
2031}
2032
2033/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
2034/// the packed maximum values.
2035///
2036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu16)
2037#[inline]
2038#[target_feature(enable = "avx2")]
2039#[cfg_attr(test, assert_instr(vpmaxuw))]
2040#[stable(feature = "simd_x86", since = "1.27.0")]
2041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2042pub const fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
2043 unsafe { simd_imax(a.as_u16x16(), b.as_u16x16()).as_m256i() }
2044}
2045
2046/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
2047/// the packed maximum values.
2048///
2049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu32)
2050#[inline]
2051#[target_feature(enable = "avx2")]
2052#[cfg_attr(test, assert_instr(vpmaxud))]
2053#[stable(feature = "simd_x86", since = "1.27.0")]
2054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2055pub const fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
2056 unsafe { simd_imax(a.as_u32x8(), b.as_u32x8()).as_m256i() }
2057}
2058
2059/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2060/// the packed maximum values.
2061///
2062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu8)
2063#[inline]
2064#[target_feature(enable = "avx2")]
2065#[cfg_attr(test, assert_instr(vpmaxub))]
2066#[stable(feature = "simd_x86", since = "1.27.0")]
2067#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2068pub const fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
2069 unsafe { simd_imax(a.as_u8x32(), b.as_u8x32()).as_m256i() }
2070}
2071
2072/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
2073/// minimum values.
2074///
2075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi16)
2076#[inline]
2077#[target_feature(enable = "avx2")]
2078#[cfg_attr(test, assert_instr(vpminsw))]
2079#[stable(feature = "simd_x86", since = "1.27.0")]
2080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2081pub const fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
2082 unsafe { simd_imin(a.as_i16x16(), b.as_i16x16()).as_m256i() }
2083}
2084
2085/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
2086/// minimum values.
2087///
2088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi32)
2089#[inline]
2090#[target_feature(enable = "avx2")]
2091#[cfg_attr(test, assert_instr(vpminsd))]
2092#[stable(feature = "simd_x86", since = "1.27.0")]
2093#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2094pub const fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2095 unsafe { simd_imin(a.as_i32x8(), b.as_i32x8()).as_m256i() }
2096}
2097
2098/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
2099/// minimum values.
2100///
2101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi8)
2102#[inline]
2103#[target_feature(enable = "avx2")]
2104#[cfg_attr(test, assert_instr(vpminsb))]
2105#[stable(feature = "simd_x86", since = "1.27.0")]
2106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2107pub const fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2108 unsafe { simd_imin(a.as_i8x32(), b.as_i8x32()).as_m256i() }
2109}
2110
2111/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
2112/// the packed minimum values.
2113///
2114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu16)
2115#[inline]
2116#[target_feature(enable = "avx2")]
2117#[cfg_attr(test, assert_instr(vpminuw))]
2118#[stable(feature = "simd_x86", since = "1.27.0")]
2119#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2120pub const fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2121 unsafe { simd_imin(a.as_u16x16(), b.as_u16x16()).as_m256i() }
2122}
2123
2124/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
2125/// the packed minimum values.
2126///
2127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu32)
2128#[inline]
2129#[target_feature(enable = "avx2")]
2130#[cfg_attr(test, assert_instr(vpminud))]
2131#[stable(feature = "simd_x86", since = "1.27.0")]
2132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2133pub const fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2134 unsafe { simd_imin(a.as_u32x8(), b.as_u32x8()).as_m256i() }
2135}
2136
2137/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2138/// the packed minimum values.
2139///
2140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu8)
2141#[inline]
2142#[target_feature(enable = "avx2")]
2143#[cfg_attr(test, assert_instr(vpminub))]
2144#[stable(feature = "simd_x86", since = "1.27.0")]
2145#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2146pub const fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2147 unsafe { simd_imin(a.as_u8x32(), b.as_u8x32()).as_m256i() }
2148}
2149
2150/// Creates mask from the most significant bit of each 8-bit element in `a`,
2151/// return the result.
2152///
2153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_epi8)
2154#[inline]
2155#[target_feature(enable = "avx2")]
2156#[cfg_attr(test, assert_instr(vpmovmskb))]
2157#[stable(feature = "simd_x86", since = "1.27.0")]
2158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2159pub const fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2160 unsafe {
2161 let z: Simd = i8x32::ZERO;
2162 let m: i8x32 = simd_lt(x:a.as_i8x32(), y:z);
2163 simd_bitmask::<_, u32>(m) as i32
2164 }
2165}
2166
2167/// Computes the sum of absolute differences (SADs) of quadruplets of unsigned
2168/// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit
2169/// results in dst. Eight SADs are performed for each 128-bit lane using one
2170/// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is
2171/// selected from `b` starting at on the offset specified in `imm8`. Eight
2172/// quadruplets are formed from sequential 8-bit integers selected from `a`
2173/// starting at the offset specified in `imm8`.
2174///
2175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mpsadbw_epu8)
2176#[inline]
2177#[target_feature(enable = "avx2")]
2178#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2179#[rustc_legacy_const_generics(2)]
2180#[stable(feature = "simd_x86", since = "1.27.0")]
2181pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2182 static_assert_uimm_bits!(IMM8, 8);
2183 unsafe { transmute(src:mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
2184}
2185
2186/// Multiplies the low 32-bit integers from each packed 64-bit element in
2187/// `a` and `b`
2188///
2189/// Returns the 64-bit results.
2190///
2191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epi32)
2192#[inline]
2193#[target_feature(enable = "avx2")]
2194#[cfg_attr(test, assert_instr(vpmuldq))]
2195#[stable(feature = "simd_x86", since = "1.27.0")]
2196#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2197pub const fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2198 unsafe {
2199 let a: Simd = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2200 let b: Simd = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2201 transmute(src:simd_mul(x:a, y:b))
2202 }
2203}
2204
2205/// Multiplies the low unsigned 32-bit integers from each packed 64-bit
2206/// element in `a` and `b`
2207///
2208/// Returns the unsigned 64-bit results.
2209///
2210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epu32)
2211#[inline]
2212#[target_feature(enable = "avx2")]
2213#[cfg_attr(test, assert_instr(vpmuludq))]
2214#[stable(feature = "simd_x86", since = "1.27.0")]
2215#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2216pub const fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2217 unsafe {
2218 let a: Simd = a.as_u64x4();
2219 let b: Simd = b.as_u64x4();
2220 let mask: Simd = u64x4::splat(u32::MAX as u64);
2221 transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
2222 }
2223}
2224
2225/// Multiplies the packed 16-bit integers in `a` and `b`, producing
2226/// intermediate 32-bit integers and returning the high 16 bits of the
2227/// intermediate integers.
2228///
2229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epi16)
2230#[inline]
2231#[target_feature(enable = "avx2")]
2232#[cfg_attr(test, assert_instr(vpmulhw))]
2233#[stable(feature = "simd_x86", since = "1.27.0")]
2234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2235pub const fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2236 unsafe {
2237 let a: Simd = simd_cast::<_, i32x16>(a.as_i16x16());
2238 let b: Simd = simd_cast::<_, i32x16>(b.as_i16x16());
2239 let r: Simd = simd_shr(lhs:simd_mul(a, b), rhs:i32x16::splat(16));
2240 transmute(src:simd_cast::<i32x16, i16x16>(r))
2241 }
2242}
2243
2244/// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing
2245/// intermediate 32-bit integers and returning the high 16 bits of the
2246/// intermediate integers.
2247///
2248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epu16)
2249#[inline]
2250#[target_feature(enable = "avx2")]
2251#[cfg_attr(test, assert_instr(vpmulhuw))]
2252#[stable(feature = "simd_x86", since = "1.27.0")]
2253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2254pub const fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2255 unsafe {
2256 let a: Simd = simd_cast::<_, u32x16>(a.as_u16x16());
2257 let b: Simd = simd_cast::<_, u32x16>(b.as_u16x16());
2258 let r: Simd = simd_shr(lhs:simd_mul(a, b), rhs:u32x16::splat(16));
2259 transmute(src:simd_cast::<u32x16, u16x16>(r))
2260 }
2261}
2262
2263/// Multiplies the packed 16-bit integers in `a` and `b`, producing
2264/// intermediate 32-bit integers, and returns the low 16 bits of the
2265/// intermediate integers
2266///
2267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi16)
2268#[inline]
2269#[target_feature(enable = "avx2")]
2270#[cfg_attr(test, assert_instr(vpmullw))]
2271#[stable(feature = "simd_x86", since = "1.27.0")]
2272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2273pub const fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2274 unsafe { transmute(src:simd_mul(x:a.as_i16x16(), y:b.as_i16x16())) }
2275}
2276
2277/// Multiplies the packed 32-bit integers in `a` and `b`, producing
2278/// intermediate 64-bit integers, and returns the low 32 bits of the
2279/// intermediate integers
2280///
2281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi32)
2282#[inline]
2283#[target_feature(enable = "avx2")]
2284#[cfg_attr(test, assert_instr(vpmulld))]
2285#[stable(feature = "simd_x86", since = "1.27.0")]
2286#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2287pub const fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2288 unsafe { transmute(src:simd_mul(x:a.as_i32x8(), y:b.as_i32x8())) }
2289}
2290
2291/// Multiplies packed 16-bit integers in `a` and `b`, producing
2292/// intermediate signed 32-bit integers. Truncate each intermediate
2293/// integer to the 18 most significant bits, round by adding 1, and
2294/// return bits `[16:1]`.
2295///
2296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhrs_epi16)
2297#[inline]
2298#[target_feature(enable = "avx2")]
2299#[cfg_attr(test, assert_instr(vpmulhrsw))]
2300#[stable(feature = "simd_x86", since = "1.27.0")]
2301pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2302 unsafe { transmute(src:pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2303}
2304
2305/// Computes the bitwise OR of 256 bits (representing integer data) in `a`
2306/// and `b`
2307///
2308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_si256)
2309#[inline]
2310#[target_feature(enable = "avx2")]
2311#[cfg_attr(test, assert_instr(vorps))]
2312#[stable(feature = "simd_x86", since = "1.27.0")]
2313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2314pub const fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2315 unsafe { transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8())) }
2316}
2317
2318/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2319/// using signed saturation
2320///
2321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16)
2322#[inline]
2323#[target_feature(enable = "avx2")]
2324#[cfg_attr(test, assert_instr(vpacksswb))]
2325#[stable(feature = "simd_x86", since = "1.27.0")]
2326pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2327 unsafe { transmute(src:packsswb(a.as_i16x16(), b.as_i16x16())) }
2328}
2329
2330/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2331/// using signed saturation
2332///
2333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32)
2334#[inline]
2335#[target_feature(enable = "avx2")]
2336#[cfg_attr(test, assert_instr(vpackssdw))]
2337#[stable(feature = "simd_x86", since = "1.27.0")]
2338pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2339 unsafe { transmute(src:packssdw(a.as_i32x8(), b.as_i32x8())) }
2340}
2341
2342/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2343/// using unsigned saturation
2344///
2345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16)
2346#[inline]
2347#[target_feature(enable = "avx2")]
2348#[cfg_attr(test, assert_instr(vpackuswb))]
2349#[stable(feature = "simd_x86", since = "1.27.0")]
2350pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2351 unsafe { transmute(src:packuswb(a.as_i16x16(), b.as_i16x16())) }
2352}
2353
2354/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2355/// using unsigned saturation
2356///
2357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32)
2358#[inline]
2359#[target_feature(enable = "avx2")]
2360#[cfg_attr(test, assert_instr(vpackusdw))]
2361#[stable(feature = "simd_x86", since = "1.27.0")]
2362pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2363 unsafe { transmute(src:packusdw(a.as_i32x8(), b.as_i32x8())) }
2364}
2365
2366/// Permutes packed 32-bit integers from `a` according to the content of `b`.
2367///
2368/// The last 3 bits of each integer of `b` are used as addresses into the 8
2369/// integers of `a`.
2370///
2371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32)
2372#[inline]
2373#[target_feature(enable = "avx2")]
2374#[cfg_attr(test, assert_instr(vpermps))]
2375#[stable(feature = "simd_x86", since = "1.27.0")]
2376pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2377 unsafe { transmute(src:permd(a.as_u32x8(), b.as_u32x8())) }
2378}
2379
2380/// Permutes 64-bit integers from `a` using control mask `imm8`.
2381///
2382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_epi64)
2383#[inline]
2384#[target_feature(enable = "avx2")]
2385#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2386#[rustc_legacy_const_generics(1)]
2387#[stable(feature = "simd_x86", since = "1.27.0")]
2388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2389pub const fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2390 static_assert_uimm_bits!(IMM8, 8);
2391 unsafe {
2392 let zero: Simd = i64x4::ZERO;
2393 let r: i64x4 = simd_shuffle!(
2394 a.as_i64x4(),
2395 zero,
2396 [
2397 IMM8 as u32 & 0b11,
2398 (IMM8 as u32 >> 2) & 0b11,
2399 (IMM8 as u32 >> 4) & 0b11,
2400 (IMM8 as u32 >> 6) & 0b11,
2401 ],
2402 );
2403 transmute(src:r)
2404 }
2405}
2406
2407/// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`.
2408///
2409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256)
2410#[inline]
2411#[target_feature(enable = "avx2")]
2412#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2413#[rustc_legacy_const_generics(2)]
2414#[stable(feature = "simd_x86", since = "1.27.0")]
2415#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2416pub const fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2417 static_assert_uimm_bits!(IMM8, 8);
2418 _mm256_permute2f128_si256::<IMM8>(a, b)
2419}
2420
2421/// Shuffles 64-bit floating-point elements in `a` across lanes using the
2422/// control in `imm8`.
2423///
2424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_pd)
2425#[inline]
2426#[target_feature(enable = "avx2")]
2427#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2428#[rustc_legacy_const_generics(1)]
2429#[stable(feature = "simd_x86", since = "1.27.0")]
2430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2431pub const fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2432 static_assert_uimm_bits!(IMM8, 8);
2433 unsafe {
2434 simd_shuffle!(
2435 a,
2436 _mm256_undefined_pd(),
2437 [
2438 IMM8 as u32 & 0b11,
2439 (IMM8 as u32 >> 2) & 0b11,
2440 (IMM8 as u32 >> 4) & 0b11,
2441 (IMM8 as u32 >> 6) & 0b11,
2442 ],
2443 )
2444 }
2445}
2446
2447/// Shuffles eight 32-bit floating-point elements in `a` across lanes using
2448/// the corresponding 32-bit integer index in `idx`.
2449///
2450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_ps)
2451#[inline]
2452#[target_feature(enable = "avx2")]
2453#[cfg_attr(test, assert_instr(vpermps))]
2454#[stable(feature = "simd_x86", since = "1.27.0")]
2455pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2456 unsafe { permps(a, b:idx.as_i32x8()) }
2457}
2458
2459/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
2460/// and `b`, then horizontally sum each consecutive 8 differences to
2461/// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
2462/// integers in the low 16 bits of the 64-bit return value
2463///
2464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sad_epu8)
2465#[inline]
2466#[target_feature(enable = "avx2")]
2467#[cfg_attr(test, assert_instr(vpsadbw))]
2468#[stable(feature = "simd_x86", since = "1.27.0")]
2469pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2470 unsafe { transmute(src:psadbw(a.as_u8x32(), b.as_u8x32())) }
2471}
2472
2473/// Shuffles bytes from `a` according to the content of `b`.
2474///
2475/// For each of the 128-bit low and high halves of the vectors, the last
2476/// 4 bits of each byte of `b` are used as addresses into the respective
2477/// low or high 16 bytes of `a`. That is, the halves are shuffled separately.
2478///
2479/// In addition, if the highest significant bit of a byte of `b` is set, the
2480/// respective destination byte is set to 0.
2481///
2482/// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically
2483/// equivalent to:
2484///
2485/// ```
2486/// fn mm256_shuffle_epi8(a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
2487/// let mut r = [0; 32];
2488/// for i in 0..16 {
2489/// // if the most significant bit of b is set,
2490/// // then the destination byte is set to 0.
2491/// if b[i] & 0x80 == 0u8 {
2492/// r[i] = a[(b[i] % 16) as usize];
2493/// }
2494/// if b[i + 16] & 0x80 == 0u8 {
2495/// r[i + 16] = a[(b[i + 16] % 16 + 16) as usize];
2496/// }
2497/// }
2498/// r
2499/// }
2500/// ```
2501///
2502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi8)
2503#[inline]
2504#[target_feature(enable = "avx2")]
2505#[cfg_attr(test, assert_instr(vpshufb))]
2506#[stable(feature = "simd_x86", since = "1.27.0")]
2507pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2508 unsafe { transmute(src:pshufb(a.as_u8x32(), b.as_u8x32())) }
2509}
2510
2511/// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in
2512/// `imm8`.
2513///
2514/// ```rust
2515/// #[cfg(target_arch = "x86")]
2516/// use std::arch::x86::*;
2517/// #[cfg(target_arch = "x86_64")]
2518/// use std::arch::x86_64::*;
2519///
2520/// # fn main() {
2521/// # if is_x86_feature_detected!("avx2") {
2522/// # #[target_feature(enable = "avx2")]
2523/// # unsafe fn worker() {
2524/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
2525///
2526/// let c1 = _mm256_shuffle_epi32(a, 0b00_11_10_01);
2527/// let c2 = _mm256_shuffle_epi32(a, 0b01_00_10_11);
2528///
2529/// let expected1 = _mm256_setr_epi32(1, 2, 3, 0, 5, 6, 7, 4);
2530/// let expected2 = _mm256_setr_epi32(3, 2, 0, 1, 7, 6, 4, 5);
2531///
2532/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c1, expected1)), !0);
2533/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c2, expected2)), !0);
2534/// # }
2535/// # unsafe { worker(); }
2536/// # }
2537/// # }
2538/// ```
2539///
2540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi32)
2541#[inline]
2542#[target_feature(enable = "avx2")]
2543#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2544#[rustc_legacy_const_generics(1)]
2545#[stable(feature = "simd_x86", since = "1.27.0")]
2546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2547pub const fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2548 static_assert_uimm_bits!(MASK, 8);
2549 unsafe {
2550 let r: i32x8 = simd_shuffle!(
2551 a.as_i32x8(),
2552 a.as_i32x8(),
2553 [
2554 MASK as u32 & 0b11,
2555 (MASK as u32 >> 2) & 0b11,
2556 (MASK as u32 >> 4) & 0b11,
2557 (MASK as u32 >> 6) & 0b11,
2558 (MASK as u32 & 0b11) + 4,
2559 ((MASK as u32 >> 2) & 0b11) + 4,
2560 ((MASK as u32 >> 4) & 0b11) + 4,
2561 ((MASK as u32 >> 6) & 0b11) + 4,
2562 ],
2563 );
2564 transmute(src:r)
2565 }
2566}
2567
2568/// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using
2569/// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied
2570/// to the output.
2571///
2572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflehi_epi16)
2573#[inline]
2574#[target_feature(enable = "avx2")]
2575#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2576#[rustc_legacy_const_generics(1)]
2577#[stable(feature = "simd_x86", since = "1.27.0")]
2578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2579pub const fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2580 static_assert_uimm_bits!(IMM8, 8);
2581 unsafe {
2582 let a = a.as_i16x16();
2583 let r: i16x16 = simd_shuffle!(
2584 a,
2585 a,
2586 [
2587 0,
2588 1,
2589 2,
2590 3,
2591 4 + (IMM8 as u32 & 0b11),
2592 4 + ((IMM8 as u32 >> 2) & 0b11),
2593 4 + ((IMM8 as u32 >> 4) & 0b11),
2594 4 + ((IMM8 as u32 >> 6) & 0b11),
2595 8,
2596 9,
2597 10,
2598 11,
2599 12 + (IMM8 as u32 & 0b11),
2600 12 + ((IMM8 as u32 >> 2) & 0b11),
2601 12 + ((IMM8 as u32 >> 4) & 0b11),
2602 12 + ((IMM8 as u32 >> 6) & 0b11),
2603 ],
2604 );
2605 transmute(r)
2606 }
2607}
2608
2609/// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using
2610/// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied
2611/// to the output.
2612///
2613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflelo_epi16)
2614#[inline]
2615#[target_feature(enable = "avx2")]
2616#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2617#[rustc_legacy_const_generics(1)]
2618#[stable(feature = "simd_x86", since = "1.27.0")]
2619#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2620pub const fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2621 static_assert_uimm_bits!(IMM8, 8);
2622 unsafe {
2623 let a = a.as_i16x16();
2624 let r: i16x16 = simd_shuffle!(
2625 a,
2626 a,
2627 [
2628 0 + (IMM8 as u32 & 0b11),
2629 0 + ((IMM8 as u32 >> 2) & 0b11),
2630 0 + ((IMM8 as u32 >> 4) & 0b11),
2631 0 + ((IMM8 as u32 >> 6) & 0b11),
2632 4,
2633 5,
2634 6,
2635 7,
2636 8 + (IMM8 as u32 & 0b11),
2637 8 + ((IMM8 as u32 >> 2) & 0b11),
2638 8 + ((IMM8 as u32 >> 4) & 0b11),
2639 8 + ((IMM8 as u32 >> 6) & 0b11),
2640 12,
2641 13,
2642 14,
2643 15,
2644 ],
2645 );
2646 transmute(r)
2647 }
2648}
2649
2650/// Negates packed 16-bit integers in `a` when the corresponding signed
2651/// 16-bit integer in `b` is negative, and returns the results.
2652/// Results are zeroed out when the corresponding element in `b` is zero.
2653///
2654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi16)
2655#[inline]
2656#[target_feature(enable = "avx2")]
2657#[cfg_attr(test, assert_instr(vpsignw))]
2658#[stable(feature = "simd_x86", since = "1.27.0")]
2659pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2660 unsafe { transmute(src:psignw(a.as_i16x16(), b.as_i16x16())) }
2661}
2662
2663/// Negates packed 32-bit integers in `a` when the corresponding signed
2664/// 32-bit integer in `b` is negative, and returns the results.
2665/// Results are zeroed out when the corresponding element in `b` is zero.
2666///
2667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi32)
2668#[inline]
2669#[target_feature(enable = "avx2")]
2670#[cfg_attr(test, assert_instr(vpsignd))]
2671#[stable(feature = "simd_x86", since = "1.27.0")]
2672pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2673 unsafe { transmute(src:psignd(a.as_i32x8(), b.as_i32x8())) }
2674}
2675
2676/// Negates packed 8-bit integers in `a` when the corresponding signed
2677/// 8-bit integer in `b` is negative, and returns the results.
2678/// Results are zeroed out when the corresponding element in `b` is zero.
2679///
2680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi8)
2681#[inline]
2682#[target_feature(enable = "avx2")]
2683#[cfg_attr(test, assert_instr(vpsignb))]
2684#[stable(feature = "simd_x86", since = "1.27.0")]
2685pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2686 unsafe { transmute(src:psignb(a.as_i8x32(), b.as_i8x32())) }
2687}
2688
2689/// Shifts packed 16-bit integers in `a` left by `count` while
2690/// shifting in zeros, and returns the result
2691///
2692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi16)
2693#[inline]
2694#[target_feature(enable = "avx2")]
2695#[cfg_attr(test, assert_instr(vpsllw))]
2696#[stable(feature = "simd_x86", since = "1.27.0")]
2697pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2698 unsafe { transmute(src:psllw(a.as_i16x16(), count.as_i16x8())) }
2699}
2700
2701/// Shifts packed 32-bit integers in `a` left by `count` while
2702/// shifting in zeros, and returns the result
2703///
2704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi32)
2705#[inline]
2706#[target_feature(enable = "avx2")]
2707#[cfg_attr(test, assert_instr(vpslld))]
2708#[stable(feature = "simd_x86", since = "1.27.0")]
2709pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2710 unsafe { transmute(src:pslld(a.as_i32x8(), count.as_i32x4())) }
2711}
2712
2713/// Shifts packed 64-bit integers in `a` left by `count` while
2714/// shifting in zeros, and returns the result
2715///
2716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi64)
2717#[inline]
2718#[target_feature(enable = "avx2")]
2719#[cfg_attr(test, assert_instr(vpsllq))]
2720#[stable(feature = "simd_x86", since = "1.27.0")]
2721pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2722 unsafe { transmute(src:psllq(a.as_i64x4(), count.as_i64x2())) }
2723}
2724
2725/// Shifts packed 16-bit integers in `a` left by `IMM8` while
2726/// shifting in zeros, return the results;
2727///
2728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi16)
2729#[inline]
2730#[target_feature(enable = "avx2")]
2731#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2732#[rustc_legacy_const_generics(1)]
2733#[stable(feature = "simd_x86", since = "1.27.0")]
2734#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2735pub const fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2736 static_assert_uimm_bits!(IMM8, 8);
2737 unsafe {
2738 if IMM8 >= 16 {
2739 _mm256_setzero_si256()
2740 } else {
2741 transmute(src:simd_shl(lhs:a.as_u16x16(), rhs:u16x16::splat(IMM8 as u16)))
2742 }
2743 }
2744}
2745
2746/// Shifts packed 32-bit integers in `a` left by `IMM8` while
2747/// shifting in zeros, return the results;
2748///
2749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi32)
2750#[inline]
2751#[target_feature(enable = "avx2")]
2752#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2753#[rustc_legacy_const_generics(1)]
2754#[stable(feature = "simd_x86", since = "1.27.0")]
2755#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2756pub const fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2757 unsafe {
2758 static_assert_uimm_bits!(IMM8, 8);
2759 if IMM8 >= 32 {
2760 _mm256_setzero_si256()
2761 } else {
2762 transmute(src:simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8 as u32)))
2763 }
2764 }
2765}
2766
2767/// Shifts packed 64-bit integers in `a` left by `IMM8` while
2768/// shifting in zeros, return the results;
2769///
2770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi64)
2771#[inline]
2772#[target_feature(enable = "avx2")]
2773#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2774#[rustc_legacy_const_generics(1)]
2775#[stable(feature = "simd_x86", since = "1.27.0")]
2776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2777pub const fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2778 unsafe {
2779 static_assert_uimm_bits!(IMM8, 8);
2780 if IMM8 >= 64 {
2781 _mm256_setzero_si256()
2782 } else {
2783 transmute(src:simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64)))
2784 }
2785 }
2786}
2787
2788/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2789///
2790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_si256)
2791#[inline]
2792#[target_feature(enable = "avx2")]
2793#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2794#[rustc_legacy_const_generics(1)]
2795#[stable(feature = "simd_x86", since = "1.27.0")]
2796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2797pub const fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2798 static_assert_uimm_bits!(IMM8, 8);
2799 _mm256_bslli_epi128::<IMM8>(a)
2800}
2801
2802/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2803///
2804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bslli_epi128)
2805#[inline]
2806#[target_feature(enable = "avx2")]
2807#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2808#[rustc_legacy_const_generics(1)]
2809#[stable(feature = "simd_x86", since = "1.27.0")]
2810#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2811pub const fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2812 static_assert_uimm_bits!(IMM8, 8);
2813 const fn mask(shift: i32, i: u32) -> u32 {
2814 let shift = shift as u32 & 0xff;
2815 if shift > 15 || i % 16 < shift {
2816 0
2817 } else {
2818 32 + (i - shift)
2819 }
2820 }
2821 unsafe {
2822 let a = a.as_i8x32();
2823 let r: i8x32 = simd_shuffle!(
2824 i8x32::ZERO,
2825 a,
2826 [
2827 mask(IMM8, 0),
2828 mask(IMM8, 1),
2829 mask(IMM8, 2),
2830 mask(IMM8, 3),
2831 mask(IMM8, 4),
2832 mask(IMM8, 5),
2833 mask(IMM8, 6),
2834 mask(IMM8, 7),
2835 mask(IMM8, 8),
2836 mask(IMM8, 9),
2837 mask(IMM8, 10),
2838 mask(IMM8, 11),
2839 mask(IMM8, 12),
2840 mask(IMM8, 13),
2841 mask(IMM8, 14),
2842 mask(IMM8, 15),
2843 mask(IMM8, 16),
2844 mask(IMM8, 17),
2845 mask(IMM8, 18),
2846 mask(IMM8, 19),
2847 mask(IMM8, 20),
2848 mask(IMM8, 21),
2849 mask(IMM8, 22),
2850 mask(IMM8, 23),
2851 mask(IMM8, 24),
2852 mask(IMM8, 25),
2853 mask(IMM8, 26),
2854 mask(IMM8, 27),
2855 mask(IMM8, 28),
2856 mask(IMM8, 29),
2857 mask(IMM8, 30),
2858 mask(IMM8, 31),
2859 ],
2860 );
2861 transmute(r)
2862 }
2863}
2864
2865/// Shifts packed 32-bit integers in `a` left by the amount
2866/// specified by the corresponding element in `count` while
2867/// shifting in zeros, and returns the result.
2868///
2869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi32)
2870#[inline]
2871#[target_feature(enable = "avx2")]
2872#[cfg_attr(test, assert_instr(vpsllvd))]
2873#[stable(feature = "simd_x86", since = "1.27.0")]
2874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2875pub const fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2876 unsafe {
2877 let count: Simd = count.as_u32x4();
2878 let no_overflow: u32x4 = simd_lt(x:count, y:u32x4::splat(u32::BITS));
2879 let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u32x4::ZERO);
2880 simd_select(mask:no_overflow, if_true:simd_shl(a.as_u32x4(), count), if_false:u32x4::ZERO).as_m128i()
2881 }
2882}
2883
2884/// Shifts packed 32-bit integers in `a` left by the amount
2885/// specified by the corresponding element in `count` while
2886/// shifting in zeros, and returns the result.
2887///
2888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi32)
2889#[inline]
2890#[target_feature(enable = "avx2")]
2891#[cfg_attr(test, assert_instr(vpsllvd))]
2892#[stable(feature = "simd_x86", since = "1.27.0")]
2893#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2894pub const fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2895 unsafe {
2896 let count: Simd = count.as_u32x8();
2897 let no_overflow: u32x8 = simd_lt(x:count, y:u32x8::splat(u32::BITS));
2898 let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u32x8::ZERO);
2899 simd_select(mask:no_overflow, if_true:simd_shl(a.as_u32x8(), count), if_false:u32x8::ZERO).as_m256i()
2900 }
2901}
2902
2903/// Shifts packed 64-bit integers in `a` left by the amount
2904/// specified by the corresponding element in `count` while
2905/// shifting in zeros, and returns the result.
2906///
2907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi64)
2908#[inline]
2909#[target_feature(enable = "avx2")]
2910#[cfg_attr(test, assert_instr(vpsllvq))]
2911#[stable(feature = "simd_x86", since = "1.27.0")]
2912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2913pub const fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2914 unsafe {
2915 let count: Simd = count.as_u64x2();
2916 let no_overflow: u64x2 = simd_lt(x:count, y:u64x2::splat(u64::BITS as u64));
2917 let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u64x2::ZERO);
2918 simd_select(mask:no_overflow, if_true:simd_shl(a.as_u64x2(), count), if_false:u64x2::ZERO).as_m128i()
2919 }
2920}
2921
2922/// Shifts packed 64-bit integers in `a` left by the amount
2923/// specified by the corresponding element in `count` while
2924/// shifting in zeros, and returns the result.
2925///
2926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi64)
2927#[inline]
2928#[target_feature(enable = "avx2")]
2929#[cfg_attr(test, assert_instr(vpsllvq))]
2930#[stable(feature = "simd_x86", since = "1.27.0")]
2931#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2932pub const fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2933 unsafe {
2934 let count: Simd = count.as_u64x4();
2935 let no_overflow: u64x4 = simd_lt(x:count, y:u64x4::splat(u64::BITS as u64));
2936 let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u64x4::ZERO);
2937 simd_select(mask:no_overflow, if_true:simd_shl(a.as_u64x4(), count), if_false:u64x4::ZERO).as_m256i()
2938 }
2939}
2940
2941/// Shifts packed 16-bit integers in `a` right by `count` while
2942/// shifting in sign bits.
2943///
2944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi16)
2945#[inline]
2946#[target_feature(enable = "avx2")]
2947#[cfg_attr(test, assert_instr(vpsraw))]
2948#[stable(feature = "simd_x86", since = "1.27.0")]
2949pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2950 unsafe { transmute(src:psraw(a.as_i16x16(), count.as_i16x8())) }
2951}
2952
2953/// Shifts packed 32-bit integers in `a` right by `count` while
2954/// shifting in sign bits.
2955///
2956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi32)
2957#[inline]
2958#[target_feature(enable = "avx2")]
2959#[cfg_attr(test, assert_instr(vpsrad))]
2960#[stable(feature = "simd_x86", since = "1.27.0")]
2961pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2962 unsafe { transmute(src:psrad(a.as_i32x8(), count.as_i32x4())) }
2963}
2964
2965/// Shifts packed 16-bit integers in `a` right by `IMM8` while
2966/// shifting in sign bits.
2967///
2968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi16)
2969#[inline]
2970#[target_feature(enable = "avx2")]
2971#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2972#[rustc_legacy_const_generics(1)]
2973#[stable(feature = "simd_x86", since = "1.27.0")]
2974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2975pub const fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2976 static_assert_uimm_bits!(IMM8, 8);
2977 unsafe { transmute(src:simd_shr(lhs:a.as_i16x16(), rhs:i16x16::splat(IMM8.min(15) as i16))) }
2978}
2979
2980/// Shifts packed 32-bit integers in `a` right by `IMM8` while
2981/// shifting in sign bits.
2982///
2983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi32)
2984#[inline]
2985#[target_feature(enable = "avx2")]
2986#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2987#[rustc_legacy_const_generics(1)]
2988#[stable(feature = "simd_x86", since = "1.27.0")]
2989#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2990pub const fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2991 static_assert_uimm_bits!(IMM8, 8);
2992 unsafe { transmute(src:simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(31)))) }
2993}
2994
2995/// Shifts packed 32-bit integers in `a` right by the amount specified by the
2996/// corresponding element in `count` while shifting in sign bits.
2997///
2998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi32)
2999#[inline]
3000#[target_feature(enable = "avx2")]
3001#[cfg_attr(test, assert_instr(vpsravd))]
3002#[stable(feature = "simd_x86", since = "1.27.0")]
3003#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3004pub const fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
3005 unsafe {
3006 let count: Simd = count.as_u32x4();
3007 let no_overflow: u32x4 = simd_lt(x:count, y:u32x4::splat(u32::BITS));
3008 let count: Simd = simd_select(mask:no_overflow, if_true:transmute(count), if_false:i32x4::splat(31));
3009 simd_shr(lhs:a.as_i32x4(), rhs:count).as_m128i()
3010 }
3011}
3012
3013/// Shifts packed 32-bit integers in `a` right by the amount specified by the
3014/// corresponding element in `count` while shifting in sign bits.
3015///
3016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi32)
3017#[inline]
3018#[target_feature(enable = "avx2")]
3019#[cfg_attr(test, assert_instr(vpsravd))]
3020#[stable(feature = "simd_x86", since = "1.27.0")]
3021#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3022pub const fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
3023 unsafe {
3024 let count: Simd = count.as_u32x8();
3025 let no_overflow: u32x8 = simd_lt(x:count, y:u32x8::splat(u32::BITS));
3026 let count: Simd = simd_select(mask:no_overflow, if_true:transmute(count), if_false:i32x8::splat(31));
3027 simd_shr(lhs:a.as_i32x8(), rhs:count).as_m256i()
3028 }
3029}
3030
3031/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
3032///
3033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_si256)
3034#[inline]
3035#[target_feature(enable = "avx2")]
3036#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
3037#[rustc_legacy_const_generics(1)]
3038#[stable(feature = "simd_x86", since = "1.27.0")]
3039#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3040pub const fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
3041 static_assert_uimm_bits!(IMM8, 8);
3042 _mm256_bsrli_epi128::<IMM8>(a)
3043}
3044
3045/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
3046///
3047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bsrli_epi128)
3048#[inline]
3049#[target_feature(enable = "avx2")]
3050#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
3051#[rustc_legacy_const_generics(1)]
3052#[stable(feature = "simd_x86", since = "1.27.0")]
3053#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3054pub const fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
3055 static_assert_uimm_bits!(IMM8, 8);
3056 const fn mask(shift: i32, i: u32) -> u32 {
3057 let shift = shift as u32 & 0xff;
3058 if shift > 15 || (15 - (i % 16)) < shift {
3059 0
3060 } else {
3061 32 + (i + shift)
3062 }
3063 }
3064 unsafe {
3065 let a = a.as_i8x32();
3066 let r: i8x32 = simd_shuffle!(
3067 i8x32::ZERO,
3068 a,
3069 [
3070 mask(IMM8, 0),
3071 mask(IMM8, 1),
3072 mask(IMM8, 2),
3073 mask(IMM8, 3),
3074 mask(IMM8, 4),
3075 mask(IMM8, 5),
3076 mask(IMM8, 6),
3077 mask(IMM8, 7),
3078 mask(IMM8, 8),
3079 mask(IMM8, 9),
3080 mask(IMM8, 10),
3081 mask(IMM8, 11),
3082 mask(IMM8, 12),
3083 mask(IMM8, 13),
3084 mask(IMM8, 14),
3085 mask(IMM8, 15),
3086 mask(IMM8, 16),
3087 mask(IMM8, 17),
3088 mask(IMM8, 18),
3089 mask(IMM8, 19),
3090 mask(IMM8, 20),
3091 mask(IMM8, 21),
3092 mask(IMM8, 22),
3093 mask(IMM8, 23),
3094 mask(IMM8, 24),
3095 mask(IMM8, 25),
3096 mask(IMM8, 26),
3097 mask(IMM8, 27),
3098 mask(IMM8, 28),
3099 mask(IMM8, 29),
3100 mask(IMM8, 30),
3101 mask(IMM8, 31),
3102 ],
3103 );
3104 transmute(r)
3105 }
3106}
3107
3108/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
3109/// zeros.
3110///
3111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi16)
3112#[inline]
3113#[target_feature(enable = "avx2")]
3114#[cfg_attr(test, assert_instr(vpsrlw))]
3115#[stable(feature = "simd_x86", since = "1.27.0")]
3116pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
3117 unsafe { transmute(src:psrlw(a.as_i16x16(), count.as_i16x8())) }
3118}
3119
3120/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
3121/// zeros.
3122///
3123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi32)
3124#[inline]
3125#[target_feature(enable = "avx2")]
3126#[cfg_attr(test, assert_instr(vpsrld))]
3127#[stable(feature = "simd_x86", since = "1.27.0")]
3128pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3129 unsafe { transmute(src:psrld(a.as_i32x8(), count.as_i32x4())) }
3130}
3131
3132/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
3133/// zeros.
3134///
3135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi64)
3136#[inline]
3137#[target_feature(enable = "avx2")]
3138#[cfg_attr(test, assert_instr(vpsrlq))]
3139#[stable(feature = "simd_x86", since = "1.27.0")]
3140pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3141 unsafe { transmute(src:psrlq(a.as_i64x4(), count.as_i64x2())) }
3142}
3143
3144/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
3145/// zeros
3146///
3147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi16)
3148#[inline]
3149#[target_feature(enable = "avx2")]
3150#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3151#[rustc_legacy_const_generics(1)]
3152#[stable(feature = "simd_x86", since = "1.27.0")]
3153#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3154pub const fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3155 static_assert_uimm_bits!(IMM8, 8);
3156 unsafe {
3157 if IMM8 >= 16 {
3158 _mm256_setzero_si256()
3159 } else {
3160 transmute(src:simd_shr(lhs:a.as_u16x16(), rhs:u16x16::splat(IMM8 as u16)))
3161 }
3162 }
3163}
3164
3165/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
3166/// zeros
3167///
3168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi32)
3169#[inline]
3170#[target_feature(enable = "avx2")]
3171#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3172#[rustc_legacy_const_generics(1)]
3173#[stable(feature = "simd_x86", since = "1.27.0")]
3174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3175pub const fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3176 static_assert_uimm_bits!(IMM8, 8);
3177 unsafe {
3178 if IMM8 >= 32 {
3179 _mm256_setzero_si256()
3180 } else {
3181 transmute(src:simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8 as u32)))
3182 }
3183 }
3184}
3185
3186/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
3187/// zeros
3188///
3189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi64)
3190#[inline]
3191#[target_feature(enable = "avx2")]
3192#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3193#[rustc_legacy_const_generics(1)]
3194#[stable(feature = "simd_x86", since = "1.27.0")]
3195#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3196pub const fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3197 static_assert_uimm_bits!(IMM8, 8);
3198 unsafe {
3199 if IMM8 >= 64 {
3200 _mm256_setzero_si256()
3201 } else {
3202 transmute(src:simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64)))
3203 }
3204 }
3205}
3206
3207/// Shifts packed 32-bit integers in `a` right by the amount specified by
3208/// the corresponding element in `count` while shifting in zeros,
3209///
3210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi32)
3211#[inline]
3212#[target_feature(enable = "avx2")]
3213#[cfg_attr(test, assert_instr(vpsrlvd))]
3214#[stable(feature = "simd_x86", since = "1.27.0")]
3215#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3216pub const fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3217 unsafe {
3218 let count: Simd = count.as_u32x4();
3219 let no_overflow: u32x4 = simd_lt(x:count, y:u32x4::splat(u32::BITS));
3220 let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u32x4::ZERO);
3221 simd_select(mask:no_overflow, if_true:simd_shr(a.as_u32x4(), count), if_false:u32x4::ZERO).as_m128i()
3222 }
3223}
3224
3225/// Shifts packed 32-bit integers in `a` right by the amount specified by
3226/// the corresponding element in `count` while shifting in zeros,
3227///
3228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi32)
3229#[inline]
3230#[target_feature(enable = "avx2")]
3231#[cfg_attr(test, assert_instr(vpsrlvd))]
3232#[stable(feature = "simd_x86", since = "1.27.0")]
3233#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3234pub const fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3235 unsafe {
3236 let count: Simd = count.as_u32x8();
3237 let no_overflow: u32x8 = simd_lt(x:count, y:u32x8::splat(u32::BITS));
3238 let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u32x8::ZERO);
3239 simd_select(mask:no_overflow, if_true:simd_shr(a.as_u32x8(), count), if_false:u32x8::ZERO).as_m256i()
3240 }
3241}
3242
3243/// Shifts packed 64-bit integers in `a` right by the amount specified by
3244/// the corresponding element in `count` while shifting in zeros,
3245///
3246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi64)
3247#[inline]
3248#[target_feature(enable = "avx2")]
3249#[cfg_attr(test, assert_instr(vpsrlvq))]
3250#[stable(feature = "simd_x86", since = "1.27.0")]
3251#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3252pub const fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3253 unsafe {
3254 let count: Simd = count.as_u64x2();
3255 let no_overflow: u64x2 = simd_lt(x:count, y:u64x2::splat(u64::BITS as u64));
3256 let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u64x2::ZERO);
3257 simd_select(mask:no_overflow, if_true:simd_shr(a.as_u64x2(), count), if_false:u64x2::ZERO).as_m128i()
3258 }
3259}
3260
3261/// Shifts packed 64-bit integers in `a` right by the amount specified by
3262/// the corresponding element in `count` while shifting in zeros,
3263///
3264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi64)
3265#[inline]
3266#[target_feature(enable = "avx2")]
3267#[cfg_attr(test, assert_instr(vpsrlvq))]
3268#[stable(feature = "simd_x86", since = "1.27.0")]
3269#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3270pub const fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3271 unsafe {
3272 let count: Simd = count.as_u64x4();
3273 let no_overflow: u64x4 = simd_lt(x:count, y:u64x4::splat(u64::BITS as u64));
3274 let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u64x4::ZERO);
3275 simd_select(mask:no_overflow, if_true:simd_shr(a.as_u64x4(), count), if_false:u64x4::ZERO).as_m256i()
3276 }
3277}
3278
3279/// Load 256-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
3280/// must be aligned on a 32-byte boundary or a general-protection exception may be generated. To
3281/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
3282///
3283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_load_si256)
3284#[inline]
3285#[target_feature(enable = "avx2")]
3286#[cfg_attr(test, assert_instr(vmovntdqa))]
3287#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3288pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3289 let dst: __m256i;
3290 crate::arch::asm!(
3291 vpl!("vmovntdqa {a}"),
3292 a = out(ymm_reg) dst,
3293 p = in(reg) mem_addr,
3294 options(pure, readonly, nostack, preserves_flags),
3295 );
3296 dst
3297}
3298
3299/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
3300///
3301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi16)
3302#[inline]
3303#[target_feature(enable = "avx2")]
3304#[cfg_attr(test, assert_instr(vpsubw))]
3305#[stable(feature = "simd_x86", since = "1.27.0")]
3306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3307pub const fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3308 unsafe { transmute(src:simd_sub(lhs:a.as_i16x16(), rhs:b.as_i16x16())) }
3309}
3310
3311/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`
3312///
3313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi32)
3314#[inline]
3315#[target_feature(enable = "avx2")]
3316#[cfg_attr(test, assert_instr(vpsubd))]
3317#[stable(feature = "simd_x86", since = "1.27.0")]
3318#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3319pub const fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3320 unsafe { transmute(src:simd_sub(lhs:a.as_i32x8(), rhs:b.as_i32x8())) }
3321}
3322
3323/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`
3324///
3325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi64)
3326#[inline]
3327#[target_feature(enable = "avx2")]
3328#[cfg_attr(test, assert_instr(vpsubq))]
3329#[stable(feature = "simd_x86", since = "1.27.0")]
3330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3331pub const fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3332 unsafe { transmute(src:simd_sub(lhs:a.as_i64x4(), rhs:b.as_i64x4())) }
3333}
3334
3335/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
3336///
3337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi8)
3338#[inline]
3339#[target_feature(enable = "avx2")]
3340#[cfg_attr(test, assert_instr(vpsubb))]
3341#[stable(feature = "simd_x86", since = "1.27.0")]
3342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3343pub const fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3344 unsafe { transmute(src:simd_sub(lhs:a.as_i8x32(), rhs:b.as_i8x32())) }
3345}
3346
3347/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in
3348/// `a` using saturation.
3349///
3350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi16)
3351#[inline]
3352#[target_feature(enable = "avx2")]
3353#[cfg_attr(test, assert_instr(vpsubsw))]
3354#[stable(feature = "simd_x86", since = "1.27.0")]
3355#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3356pub const fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3357 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i16x16(), rhs:b.as_i16x16())) }
3358}
3359
3360/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in
3361/// `a` using saturation.
3362///
3363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi8)
3364#[inline]
3365#[target_feature(enable = "avx2")]
3366#[cfg_attr(test, assert_instr(vpsubsb))]
3367#[stable(feature = "simd_x86", since = "1.27.0")]
3368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3369pub const fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3370 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i8x32(), rhs:b.as_i8x32())) }
3371}
3372
3373/// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit
3374/// integers in `a` using saturation.
3375///
3376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu16)
3377#[inline]
3378#[target_feature(enable = "avx2")]
3379#[cfg_attr(test, assert_instr(vpsubusw))]
3380#[stable(feature = "simd_x86", since = "1.27.0")]
3381#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3382pub const fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3383 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u16x16(), rhs:b.as_u16x16())) }
3384}
3385
3386/// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit
3387/// integers in `a` using saturation.
3388///
3389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu8)
3390#[inline]
3391#[target_feature(enable = "avx2")]
3392#[cfg_attr(test, assert_instr(vpsubusb))]
3393#[stable(feature = "simd_x86", since = "1.27.0")]
3394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3395pub const fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3396 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u8x32(), rhs:b.as_u8x32())) }
3397}
3398
3399/// Unpacks and interleave 8-bit integers from the high half of each
3400/// 128-bit lane in `a` and `b`.
3401///
3402/// ```rust
3403/// #[cfg(target_arch = "x86")]
3404/// use std::arch::x86::*;
3405/// #[cfg(target_arch = "x86_64")]
3406/// use std::arch::x86_64::*;
3407///
3408/// # fn main() {
3409/// # if is_x86_feature_detected!("avx2") {
3410/// # #[target_feature(enable = "avx2")]
3411/// # unsafe fn worker() {
3412/// let a = _mm256_setr_epi8(
3413/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3414/// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3415/// );
3416/// let b = _mm256_setr_epi8(
3417/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3418/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3419/// -30, -31,
3420/// );
3421///
3422/// let c = _mm256_unpackhi_epi8(a, b);
3423///
3424/// let expected = _mm256_setr_epi8(
3425/// 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15,
3426/// 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31,
3427/// -31,
3428/// );
3429/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3430///
3431/// # }
3432/// # unsafe { worker(); }
3433/// # }
3434/// # }
3435/// ```
3436///
3437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi8)
3438#[inline]
3439#[target_feature(enable = "avx2")]
3440#[cfg_attr(test, assert_instr(vpunpckhbw))]
3441#[stable(feature = "simd_x86", since = "1.27.0")]
3442#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3443pub const fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3444 unsafe {
3445 #[rustfmt::skip]
3446 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3447 8, 40, 9, 41, 10, 42, 11, 43,
3448 12, 44, 13, 45, 14, 46, 15, 47,
3449 24, 56, 25, 57, 26, 58, 27, 59,
3450 28, 60, 29, 61, 30, 62, 31, 63,
3451 ]);
3452 transmute(src:r)
3453 }
3454}
3455
3456/// Unpacks and interleave 8-bit integers from the low half of each
3457/// 128-bit lane of `a` and `b`.
3458///
3459/// ```rust
3460/// #[cfg(target_arch = "x86")]
3461/// use std::arch::x86::*;
3462/// #[cfg(target_arch = "x86_64")]
3463/// use std::arch::x86_64::*;
3464///
3465/// # fn main() {
3466/// # if is_x86_feature_detected!("avx2") {
3467/// # #[target_feature(enable = "avx2")]
3468/// # unsafe fn worker() {
3469/// let a = _mm256_setr_epi8(
3470/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3471/// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3472/// );
3473/// let b = _mm256_setr_epi8(
3474/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3475/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3476/// -30, -31,
3477/// );
3478///
3479/// let c = _mm256_unpacklo_epi8(a, b);
3480///
3481/// let expected = _mm256_setr_epi8(
3482/// 0, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 16, -16, 17,
3483/// -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23,
3484/// );
3485/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3486///
3487/// # }
3488/// # unsafe { worker(); }
3489/// # }
3490/// # }
3491/// ```
3492///
3493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi8)
3494#[inline]
3495#[target_feature(enable = "avx2")]
3496#[cfg_attr(test, assert_instr(vpunpcklbw))]
3497#[stable(feature = "simd_x86", since = "1.27.0")]
3498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3499pub const fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3500 unsafe {
3501 #[rustfmt::skip]
3502 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3503 0, 32, 1, 33, 2, 34, 3, 35,
3504 4, 36, 5, 37, 6, 38, 7, 39,
3505 16, 48, 17, 49, 18, 50, 19, 51,
3506 20, 52, 21, 53, 22, 54, 23, 55,
3507 ]);
3508 transmute(src:r)
3509 }
3510}
3511
3512/// Unpacks and interleave 16-bit integers from the high half of each
3513/// 128-bit lane of `a` and `b`.
3514///
3515/// ```rust
3516/// #[cfg(target_arch = "x86")]
3517/// use std::arch::x86::*;
3518/// #[cfg(target_arch = "x86_64")]
3519/// use std::arch::x86_64::*;
3520///
3521/// # fn main() {
3522/// # if is_x86_feature_detected!("avx2") {
3523/// # #[target_feature(enable = "avx2")]
3524/// # unsafe fn worker() {
3525/// let a = _mm256_setr_epi16(
3526/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3527/// );
3528/// let b = _mm256_setr_epi16(
3529/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3530/// );
3531///
3532/// let c = _mm256_unpackhi_epi16(a, b);
3533///
3534/// let expected = _mm256_setr_epi16(
3535/// 4, -4, 5, -5, 6, -6, 7, -7, 12, -12, 13, -13, 14, -14, 15, -15,
3536/// );
3537/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3538///
3539/// # }
3540/// # unsafe { worker(); }
3541/// # }
3542/// # }
3543/// ```
3544///
3545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi16)
3546#[inline]
3547#[target_feature(enable = "avx2")]
3548#[cfg_attr(test, assert_instr(vpunpckhwd))]
3549#[stable(feature = "simd_x86", since = "1.27.0")]
3550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3551pub const fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3552 unsafe {
3553 let r: i16x16 = simd_shuffle!(
3554 a.as_i16x16(),
3555 b.as_i16x16(),
3556 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3557 );
3558 transmute(src:r)
3559 }
3560}
3561
3562/// Unpacks and interleave 16-bit integers from the low half of each
3563/// 128-bit lane of `a` and `b`.
3564///
3565/// ```rust
3566/// #[cfg(target_arch = "x86")]
3567/// use std::arch::x86::*;
3568/// #[cfg(target_arch = "x86_64")]
3569/// use std::arch::x86_64::*;
3570///
3571/// # fn main() {
3572/// # if is_x86_feature_detected!("avx2") {
3573/// # #[target_feature(enable = "avx2")]
3574/// # unsafe fn worker() {
3575///
3576/// let a = _mm256_setr_epi16(
3577/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3578/// );
3579/// let b = _mm256_setr_epi16(
3580/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3581/// );
3582///
3583/// let c = _mm256_unpacklo_epi16(a, b);
3584///
3585/// let expected = _mm256_setr_epi16(
3586/// 0, 0, 1, -1, 2, -2, 3, -3, 8, -8, 9, -9, 10, -10, 11, -11,
3587/// );
3588/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3589///
3590/// # }
3591/// # unsafe { worker(); }
3592/// # }
3593/// # }
3594/// ```
3595///
3596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi16)
3597#[inline]
3598#[target_feature(enable = "avx2")]
3599#[cfg_attr(test, assert_instr(vpunpcklwd))]
3600#[stable(feature = "simd_x86", since = "1.27.0")]
3601#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3602pub const fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3603 unsafe {
3604 let r: i16x16 = simd_shuffle!(
3605 a.as_i16x16(),
3606 b.as_i16x16(),
3607 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3608 );
3609 transmute(src:r)
3610 }
3611}
3612
3613/// Unpacks and interleave 32-bit integers from the high half of each
3614/// 128-bit lane of `a` and `b`.
3615///
3616/// ```rust
3617/// #[cfg(target_arch = "x86")]
3618/// use std::arch::x86::*;
3619/// #[cfg(target_arch = "x86_64")]
3620/// use std::arch::x86_64::*;
3621///
3622/// # fn main() {
3623/// # if is_x86_feature_detected!("avx2") {
3624/// # #[target_feature(enable = "avx2")]
3625/// # unsafe fn worker() {
3626/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3627/// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3628///
3629/// let c = _mm256_unpackhi_epi32(a, b);
3630///
3631/// let expected = _mm256_setr_epi32(2, -2, 3, -3, 6, -6, 7, -7);
3632/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3633///
3634/// # }
3635/// # unsafe { worker(); }
3636/// # }
3637/// # }
3638/// ```
3639///
3640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi32)
3641#[inline]
3642#[target_feature(enable = "avx2")]
3643#[cfg_attr(test, assert_instr(vunpckhps))]
3644#[stable(feature = "simd_x86", since = "1.27.0")]
3645#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3646pub const fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3647 unsafe {
3648 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3649 transmute(src:r)
3650 }
3651}
3652
3653/// Unpacks and interleave 32-bit integers from the low half of each
3654/// 128-bit lane of `a` and `b`.
3655///
3656/// ```rust
3657/// #[cfg(target_arch = "x86")]
3658/// use std::arch::x86::*;
3659/// #[cfg(target_arch = "x86_64")]
3660/// use std::arch::x86_64::*;
3661///
3662/// # fn main() {
3663/// # if is_x86_feature_detected!("avx2") {
3664/// # #[target_feature(enable = "avx2")]
3665/// # unsafe fn worker() {
3666/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3667/// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3668///
3669/// let c = _mm256_unpacklo_epi32(a, b);
3670///
3671/// let expected = _mm256_setr_epi32(0, 0, 1, -1, 4, -4, 5, -5);
3672/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3673///
3674/// # }
3675/// # unsafe { worker(); }
3676/// # }
3677/// # }
3678/// ```
3679///
3680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi32)
3681#[inline]
3682#[target_feature(enable = "avx2")]
3683#[cfg_attr(test, assert_instr(vunpcklps))]
3684#[stable(feature = "simd_x86", since = "1.27.0")]
3685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3686pub const fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3687 unsafe {
3688 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3689 transmute(src:r)
3690 }
3691}
3692
3693/// Unpacks and interleave 64-bit integers from the high half of each
3694/// 128-bit lane of `a` and `b`.
3695///
3696/// ```rust
3697/// #[cfg(target_arch = "x86")]
3698/// use std::arch::x86::*;
3699/// #[cfg(target_arch = "x86_64")]
3700/// use std::arch::x86_64::*;
3701///
3702/// # fn main() {
3703/// # if is_x86_feature_detected!("avx2") {
3704/// # #[target_feature(enable = "avx2")]
3705/// # unsafe fn worker() {
3706/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3707/// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3708///
3709/// let c = _mm256_unpackhi_epi64(a, b);
3710///
3711/// let expected = _mm256_setr_epi64x(1, -1, 3, -3);
3712/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3713///
3714/// # }
3715/// # unsafe { worker(); }
3716/// # }
3717/// # }
3718/// ```
3719///
3720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi64)
3721#[inline]
3722#[target_feature(enable = "avx2")]
3723#[cfg_attr(test, assert_instr(vunpckhpd))]
3724#[stable(feature = "simd_x86", since = "1.27.0")]
3725#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3726pub const fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3727 unsafe {
3728 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3729 transmute(src:r)
3730 }
3731}
3732
3733/// Unpacks and interleave 64-bit integers from the low half of each
3734/// 128-bit lane of `a` and `b`.
3735///
3736/// ```rust
3737/// #[cfg(target_arch = "x86")]
3738/// use std::arch::x86::*;
3739/// #[cfg(target_arch = "x86_64")]
3740/// use std::arch::x86_64::*;
3741///
3742/// # fn main() {
3743/// # if is_x86_feature_detected!("avx2") {
3744/// # #[target_feature(enable = "avx2")]
3745/// # unsafe fn worker() {
3746/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3747/// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3748///
3749/// let c = _mm256_unpacklo_epi64(a, b);
3750///
3751/// let expected = _mm256_setr_epi64x(0, 0, 2, -2);
3752/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3753///
3754/// # }
3755/// # unsafe { worker(); }
3756/// # }
3757/// # }
3758/// ```
3759///
3760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi64)
3761#[inline]
3762#[target_feature(enable = "avx2")]
3763#[cfg_attr(test, assert_instr(vunpcklpd))]
3764#[stable(feature = "simd_x86", since = "1.27.0")]
3765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3766pub const fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3767 unsafe {
3768 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3769 transmute(src:r)
3770 }
3771}
3772
3773/// Computes the bitwise XOR of 256 bits (representing integer data)
3774/// in `a` and `b`
3775///
3776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_si256)
3777#[inline]
3778#[target_feature(enable = "avx2")]
3779#[cfg_attr(test, assert_instr(vxorps))]
3780#[stable(feature = "simd_x86", since = "1.27.0")]
3781#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3782pub const fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3783 unsafe { transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4())) }
3784}
3785
3786/// Extracts an 8-bit integer from `a`, selected with `INDEX`. Returns a 32-bit
3787/// integer containing the zero-extended integer data.
3788///
3789/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
3790///
3791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi8)
3792#[inline]
3793#[target_feature(enable = "avx2")]
3794// This intrinsic has no corresponding instruction.
3795#[rustc_legacy_const_generics(1)]
3796#[stable(feature = "simd_x86", since = "1.27.0")]
3797#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3798pub const fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3799 static_assert_uimm_bits!(INDEX, 5);
3800 unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3801}
3802
3803/// Extracts a 16-bit integer from `a`, selected with `INDEX`. Returns a 32-bit
3804/// integer containing the zero-extended integer data.
3805///
3806/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
3807///
3808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi16)
3809#[inline]
3810#[target_feature(enable = "avx2")]
3811// This intrinsic has no corresponding instruction.
3812#[rustc_legacy_const_generics(1)]
3813#[stable(feature = "simd_x86", since = "1.27.0")]
3814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3815pub const fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3816 static_assert_uimm_bits!(INDEX, 4);
3817 unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3818}
3819
3820#[allow(improper_ctypes)]
3821unsafe extern "C" {
3822 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3823 unsafefn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3824 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3825 unsafefn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
3826 #[link_name = "llvm.x86.avx2.mpsadbw"]
3827 unsafefn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
3828 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3829 unsafefn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3830 #[link_name = "llvm.x86.avx2.packsswb"]
3831 unsafefn packsswb(a: i16x16, b: i16x16) -> i8x32;
3832 #[link_name = "llvm.x86.avx2.packssdw"]
3833 unsafefn packssdw(a: i32x8, b: i32x8) -> i16x16;
3834 #[link_name = "llvm.x86.avx2.packuswb"]
3835 unsafefn packuswb(a: i16x16, b: i16x16) -> u8x32;
3836 #[link_name = "llvm.x86.avx2.packusdw"]
3837 unsafefn packusdw(a: i32x8, b: i32x8) -> u16x16;
3838 #[link_name = "llvm.x86.avx2.psad.bw"]
3839 unsafefn psadbw(a: u8x32, b: u8x32) -> u64x4;
3840 #[link_name = "llvm.x86.avx2.psign.b"]
3841 unsafefn psignb(a: i8x32, b: i8x32) -> i8x32;
3842 #[link_name = "llvm.x86.avx2.psign.w"]
3843 unsafefn psignw(a: i16x16, b: i16x16) -> i16x16;
3844 #[link_name = "llvm.x86.avx2.psign.d"]
3845 unsafefn psignd(a: i32x8, b: i32x8) -> i32x8;
3846 #[link_name = "llvm.x86.avx2.psll.w"]
3847 unsafefn psllw(a: i16x16, count: i16x8) -> i16x16;
3848 #[link_name = "llvm.x86.avx2.psll.d"]
3849 unsafefn pslld(a: i32x8, count: i32x4) -> i32x8;
3850 #[link_name = "llvm.x86.avx2.psll.q"]
3851 unsafefn psllq(a: i64x4, count: i64x2) -> i64x4;
3852 #[link_name = "llvm.x86.avx2.psra.w"]
3853 unsafefn psraw(a: i16x16, count: i16x8) -> i16x16;
3854 #[link_name = "llvm.x86.avx2.psra.d"]
3855 unsafefn psrad(a: i32x8, count: i32x4) -> i32x8;
3856 #[link_name = "llvm.x86.avx2.psrl.w"]
3857 unsafefn psrlw(a: i16x16, count: i16x8) -> i16x16;
3858 #[link_name = "llvm.x86.avx2.psrl.d"]
3859 unsafefn psrld(a: i32x8, count: i32x4) -> i32x8;
3860 #[link_name = "llvm.x86.avx2.psrl.q"]
3861 unsafefn psrlq(a: i64x4, count: i64x2) -> i64x4;
3862 #[link_name = "llvm.x86.avx2.pshuf.b"]
3863 unsafefn pshufb(a: u8x32, b: u8x32) -> u8x32;
3864 #[link_name = "llvm.x86.avx2.permd"]
3865 unsafefn permd(a: u32x8, b: u32x8) -> u32x8;
3866 #[link_name = "llvm.x86.avx2.permps"]
3867 unsafefn permps(a: __m256, b: i32x8) -> __m256;
3868 #[link_name = "llvm.x86.avx2.gather.d.d"]
3869 unsafefn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3870 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3871 unsafefn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3872 #[link_name = "llvm.x86.avx2.gather.d.q"]
3873 unsafefn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3874 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3875 unsafefn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3876 #[link_name = "llvm.x86.avx2.gather.q.d"]
3877 unsafefn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3878 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3879 unsafefn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3880 #[link_name = "llvm.x86.avx2.gather.q.q"]
3881 unsafefn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3882 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3883 unsafefn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3884 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3885 unsafefn pgatherdpd(
3886 src: __m128d,
3887 slice: *const i8,
3888 offsets: i32x4,
3889 mask: __m128d,
3890 scale: i8,
3891 ) -> __m128d;
3892 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3893 unsafefn vpgatherdpd(
3894 src: __m256d,
3895 slice: *const i8,
3896 offsets: i32x4,
3897 mask: __m256d,
3898 scale: i8,
3899 ) -> __m256d;
3900 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3901 unsafefn pgatherqpd(
3902 src: __m128d,
3903 slice: *const i8,
3904 offsets: i64x2,
3905 mask: __m128d,
3906 scale: i8,
3907 ) -> __m128d;
3908 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3909 unsafefn vpgatherqpd(
3910 src: __m256d,
3911 slice: *const i8,
3912 offsets: i64x4,
3913 mask: __m256d,
3914 scale: i8,
3915 ) -> __m256d;
3916 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3917 unsafefn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3918 -> __m128;
3919 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3920 unsafefn vpgatherdps(
3921 src: __m256,
3922 slice: *const i8,
3923 offsets: i32x8,
3924 mask: __m256,
3925 scale: i8,
3926 ) -> __m256;
3927 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3928 unsafefn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3929 -> __m128;
3930 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3931 unsafefn vpgatherqps(
3932 src: __m128,
3933 slice: *const i8,
3934 offsets: i64x4,
3935 mask: __m128,
3936 scale: i8,
3937 ) -> __m128;
3938}
3939
3940#[cfg(test)]
3941mod tests {
3942 use crate::core_arch::assert_eq_const as assert_eq;
3943
3944 use stdarch_test::simd_test;
3945
3946 use crate::core_arch::x86::*;
3947
3948 #[simd_test(enable = "avx2")]
3949 const fn test_mm256_abs_epi32() {
3950 #[rustfmt::skip]
3951 let a = _mm256_setr_epi32(
3952 0, 1, -1, i32::MAX,
3953 i32::MIN, 100, -100, -32,
3954 );
3955 let r = _mm256_abs_epi32(a);
3956 #[rustfmt::skip]
3957 let e = _mm256_setr_epi32(
3958 0, 1, 1, i32::MAX,
3959 i32::MAX.wrapping_add(1), 100, 100, 32,
3960 );
3961 assert_eq_m256i(r, e);
3962 }
3963
3964 #[simd_test(enable = "avx2")]
3965 const fn test_mm256_abs_epi16() {
3966 #[rustfmt::skip]
3967 let a = _mm256_setr_epi16(
3968 0, 1, -1, 2, -2, 3, -3, 4,
3969 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3970 );
3971 let r = _mm256_abs_epi16(a);
3972 #[rustfmt::skip]
3973 let e = _mm256_setr_epi16(
3974 0, 1, 1, 2, 2, 3, 3, 4,
3975 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3976 );
3977 assert_eq_m256i(r, e);
3978 }
3979
3980 #[simd_test(enable = "avx2")]
3981 const fn test_mm256_abs_epi8() {
3982 #[rustfmt::skip]
3983 let a = _mm256_setr_epi8(
3984 0, 1, -1, 2, -2, 3, -3, 4,
3985 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3986 0, 1, -1, 2, -2, 3, -3, 4,
3987 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3988 );
3989 let r = _mm256_abs_epi8(a);
3990 #[rustfmt::skip]
3991 let e = _mm256_setr_epi8(
3992 0, 1, 1, 2, 2, 3, 3, 4,
3993 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3994 0, 1, 1, 2, 2, 3, 3, 4,
3995 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3996 );
3997 assert_eq_m256i(r, e);
3998 }
3999
4000 #[simd_test(enable = "avx2")]
4001 const fn test_mm256_add_epi64() {
4002 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
4003 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
4004 let r = _mm256_add_epi64(a, b);
4005 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
4006 assert_eq_m256i(r, e);
4007 }
4008
4009 #[simd_test(enable = "avx2")]
4010 const fn test_mm256_add_epi32() {
4011 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
4012 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4013 let r = _mm256_add_epi32(a, b);
4014 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
4015 assert_eq_m256i(r, e);
4016 }
4017
4018 #[simd_test(enable = "avx2")]
4019 const fn test_mm256_add_epi16() {
4020 #[rustfmt::skip]
4021 let a = _mm256_setr_epi16(
4022 0, 1, 2, 3, 4, 5, 6, 7,
4023 8, 9, 10, 11, 12, 13, 14, 15,
4024 );
4025 #[rustfmt::skip]
4026 let b = _mm256_setr_epi16(
4027 0, 1, 2, 3, 4, 5, 6, 7,
4028 8, 9, 10, 11, 12, 13, 14, 15,
4029 );
4030 let r = _mm256_add_epi16(a, b);
4031 #[rustfmt::skip]
4032 let e = _mm256_setr_epi16(
4033 0, 2, 4, 6, 8, 10, 12, 14,
4034 16, 18, 20, 22, 24, 26, 28, 30,
4035 );
4036 assert_eq_m256i(r, e);
4037 }
4038
4039 #[simd_test(enable = "avx2")]
4040 const fn test_mm256_add_epi8() {
4041 #[rustfmt::skip]
4042 let a = _mm256_setr_epi8(
4043 0, 1, 2, 3, 4, 5, 6, 7,
4044 8, 9, 10, 11, 12, 13, 14, 15,
4045 16, 17, 18, 19, 20, 21, 22, 23,
4046 24, 25, 26, 27, 28, 29, 30, 31,
4047 );
4048 #[rustfmt::skip]
4049 let b = _mm256_setr_epi8(
4050 0, 1, 2, 3, 4, 5, 6, 7,
4051 8, 9, 10, 11, 12, 13, 14, 15,
4052 16, 17, 18, 19, 20, 21, 22, 23,
4053 24, 25, 26, 27, 28, 29, 30, 31,
4054 );
4055 let r = _mm256_add_epi8(a, b);
4056 #[rustfmt::skip]
4057 let e = _mm256_setr_epi8(
4058 0, 2, 4, 6, 8, 10, 12, 14,
4059 16, 18, 20, 22, 24, 26, 28, 30,
4060 32, 34, 36, 38, 40, 42, 44, 46,
4061 48, 50, 52, 54, 56, 58, 60, 62,
4062 );
4063 assert_eq_m256i(r, e);
4064 }
4065
4066 #[simd_test(enable = "avx2")]
4067 const fn test_mm256_adds_epi8() {
4068 #[rustfmt::skip]
4069 let a = _mm256_setr_epi8(
4070 0, 1, 2, 3, 4, 5, 6, 7,
4071 8, 9, 10, 11, 12, 13, 14, 15,
4072 16, 17, 18, 19, 20, 21, 22, 23,
4073 24, 25, 26, 27, 28, 29, 30, 31,
4074 );
4075 #[rustfmt::skip]
4076 let b = _mm256_setr_epi8(
4077 32, 33, 34, 35, 36, 37, 38, 39,
4078 40, 41, 42, 43, 44, 45, 46, 47,
4079 48, 49, 50, 51, 52, 53, 54, 55,
4080 56, 57, 58, 59, 60, 61, 62, 63,
4081 );
4082 let r = _mm256_adds_epi8(a, b);
4083 #[rustfmt::skip]
4084 let e = _mm256_setr_epi8(
4085 32, 34, 36, 38, 40, 42, 44, 46,
4086 48, 50, 52, 54, 56, 58, 60, 62,
4087 64, 66, 68, 70, 72, 74, 76, 78,
4088 80, 82, 84, 86, 88, 90, 92, 94,
4089 );
4090 assert_eq_m256i(r, e);
4091 }
4092
4093 #[simd_test(enable = "avx2")]
4094 fn test_mm256_adds_epi8_saturate_positive() {
4095 let a = _mm256_set1_epi8(0x7F);
4096 let b = _mm256_set1_epi8(1);
4097 let r = _mm256_adds_epi8(a, b);
4098 assert_eq_m256i(r, a);
4099 }
4100
4101 #[simd_test(enable = "avx2")]
4102 fn test_mm256_adds_epi8_saturate_negative() {
4103 let a = _mm256_set1_epi8(-0x80);
4104 let b = _mm256_set1_epi8(-1);
4105 let r = _mm256_adds_epi8(a, b);
4106 assert_eq_m256i(r, a);
4107 }
4108
4109 #[simd_test(enable = "avx2")]
4110 const fn test_mm256_adds_epi16() {
4111 #[rustfmt::skip]
4112 let a = _mm256_setr_epi16(
4113 0, 1, 2, 3, 4, 5, 6, 7,
4114 8, 9, 10, 11, 12, 13, 14, 15,
4115 );
4116 #[rustfmt::skip]
4117 let b = _mm256_setr_epi16(
4118 32, 33, 34, 35, 36, 37, 38, 39,
4119 40, 41, 42, 43, 44, 45, 46, 47,
4120 );
4121 let r = _mm256_adds_epi16(a, b);
4122 #[rustfmt::skip]
4123 let e = _mm256_setr_epi16(
4124 32, 34, 36, 38, 40, 42, 44, 46,
4125 48, 50, 52, 54, 56, 58, 60, 62,
4126 );
4127
4128 assert_eq_m256i(r, e);
4129 }
4130
4131 #[simd_test(enable = "avx2")]
4132 fn test_mm256_adds_epi16_saturate_positive() {
4133 let a = _mm256_set1_epi16(0x7FFF);
4134 let b = _mm256_set1_epi16(1);
4135 let r = _mm256_adds_epi16(a, b);
4136 assert_eq_m256i(r, a);
4137 }
4138
4139 #[simd_test(enable = "avx2")]
4140 fn test_mm256_adds_epi16_saturate_negative() {
4141 let a = _mm256_set1_epi16(-0x8000);
4142 let b = _mm256_set1_epi16(-1);
4143 let r = _mm256_adds_epi16(a, b);
4144 assert_eq_m256i(r, a);
4145 }
4146
4147 #[simd_test(enable = "avx2")]
4148 const fn test_mm256_adds_epu8() {
4149 #[rustfmt::skip]
4150 let a = _mm256_setr_epi8(
4151 0, 1, 2, 3, 4, 5, 6, 7,
4152 8, 9, 10, 11, 12, 13, 14, 15,
4153 16, 17, 18, 19, 20, 21, 22, 23,
4154 24, 25, 26, 27, 28, 29, 30, 31,
4155 );
4156 #[rustfmt::skip]
4157 let b = _mm256_setr_epi8(
4158 32, 33, 34, 35, 36, 37, 38, 39,
4159 40, 41, 42, 43, 44, 45, 46, 47,
4160 48, 49, 50, 51, 52, 53, 54, 55,
4161 56, 57, 58, 59, 60, 61, 62, 63,
4162 );
4163 let r = _mm256_adds_epu8(a, b);
4164 #[rustfmt::skip]
4165 let e = _mm256_setr_epi8(
4166 32, 34, 36, 38, 40, 42, 44, 46,
4167 48, 50, 52, 54, 56, 58, 60, 62,
4168 64, 66, 68, 70, 72, 74, 76, 78,
4169 80, 82, 84, 86, 88, 90, 92, 94,
4170 );
4171 assert_eq_m256i(r, e);
4172 }
4173
4174 #[simd_test(enable = "avx2")]
4175 fn test_mm256_adds_epu8_saturate() {
4176 let a = _mm256_set1_epi8(!0);
4177 let b = _mm256_set1_epi8(1);
4178 let r = _mm256_adds_epu8(a, b);
4179 assert_eq_m256i(r, a);
4180 }
4181
4182 #[simd_test(enable = "avx2")]
4183 const fn test_mm256_adds_epu16() {
4184 #[rustfmt::skip]
4185 let a = _mm256_setr_epi16(
4186 0, 1, 2, 3, 4, 5, 6, 7,
4187 8, 9, 10, 11, 12, 13, 14, 15,
4188 );
4189 #[rustfmt::skip]
4190 let b = _mm256_setr_epi16(
4191 32, 33, 34, 35, 36, 37, 38, 39,
4192 40, 41, 42, 43, 44, 45, 46, 47,
4193 );
4194 let r = _mm256_adds_epu16(a, b);
4195 #[rustfmt::skip]
4196 let e = _mm256_setr_epi16(
4197 32, 34, 36, 38, 40, 42, 44, 46,
4198 48, 50, 52, 54, 56, 58, 60, 62,
4199 );
4200
4201 assert_eq_m256i(r, e);
4202 }
4203
4204 #[simd_test(enable = "avx2")]
4205 fn test_mm256_adds_epu16_saturate() {
4206 let a = _mm256_set1_epi16(!0);
4207 let b = _mm256_set1_epi16(1);
4208 let r = _mm256_adds_epu16(a, b);
4209 assert_eq_m256i(r, a);
4210 }
4211
4212 #[simd_test(enable = "avx2")]
4213 const fn test_mm256_and_si256() {
4214 let a = _mm256_set1_epi8(5);
4215 let b = _mm256_set1_epi8(3);
4216 let got = _mm256_and_si256(a, b);
4217 assert_eq_m256i(got, _mm256_set1_epi8(1));
4218 }
4219
4220 #[simd_test(enable = "avx2")]
4221 const fn test_mm256_andnot_si256() {
4222 let a = _mm256_set1_epi8(5);
4223 let b = _mm256_set1_epi8(3);
4224 let got = _mm256_andnot_si256(a, b);
4225 assert_eq_m256i(got, _mm256_set1_epi8(2));
4226 }
4227
4228 #[simd_test(enable = "avx2")]
4229 const fn test_mm256_avg_epu8() {
4230 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4231 let r = _mm256_avg_epu8(a, b);
4232 assert_eq_m256i(r, _mm256_set1_epi8(6));
4233 }
4234
4235 #[simd_test(enable = "avx2")]
4236 const fn test_mm256_avg_epu16() {
4237 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4238 let r = _mm256_avg_epu16(a, b);
4239 assert_eq_m256i(r, _mm256_set1_epi16(6));
4240 }
4241
4242 #[simd_test(enable = "avx2")]
4243 const fn test_mm_blend_epi32() {
4244 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4245 let e = _mm_setr_epi32(9, 3, 3, 3);
4246 let r = _mm_blend_epi32::<0x01>(a, b);
4247 assert_eq_m128i(r, e);
4248
4249 let r = _mm_blend_epi32::<0x0E>(b, a);
4250 assert_eq_m128i(r, e);
4251 }
4252
4253 #[simd_test(enable = "avx2")]
4254 const fn test_mm256_blend_epi32() {
4255 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4256 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4257 let r = _mm256_blend_epi32::<0x01>(a, b);
4258 assert_eq_m256i(r, e);
4259
4260 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4261 let r = _mm256_blend_epi32::<0x82>(a, b);
4262 assert_eq_m256i(r, e);
4263
4264 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4265 let r = _mm256_blend_epi32::<0x7C>(a, b);
4266 assert_eq_m256i(r, e);
4267 }
4268
4269 #[simd_test(enable = "avx2")]
4270 const fn test_mm256_blend_epi16() {
4271 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4272 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4273 let r = _mm256_blend_epi16::<0x01>(a, b);
4274 assert_eq_m256i(r, e);
4275
4276 let r = _mm256_blend_epi16::<0xFE>(b, a);
4277 assert_eq_m256i(r, e);
4278 }
4279
4280 #[simd_test(enable = "avx2")]
4281 const fn test_mm256_blendv_epi8() {
4282 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4283 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4284 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4285 let r = _mm256_blendv_epi8(a, b, mask);
4286 assert_eq_m256i(r, e);
4287 }
4288
4289 #[simd_test(enable = "avx2")]
4290 const fn test_mm_broadcastb_epi8() {
4291 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4292 let res = _mm_broadcastb_epi8(a);
4293 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4294 }
4295
4296 #[simd_test(enable = "avx2")]
4297 const fn test_mm256_broadcastb_epi8() {
4298 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4299 let res = _mm256_broadcastb_epi8(a);
4300 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4301 }
4302
4303 #[simd_test(enable = "avx2")]
4304 const fn test_mm_broadcastd_epi32() {
4305 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4306 let res = _mm_broadcastd_epi32(a);
4307 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4308 }
4309
4310 #[simd_test(enable = "avx2")]
4311 const fn test_mm256_broadcastd_epi32() {
4312 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4313 let res = _mm256_broadcastd_epi32(a);
4314 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4315 }
4316
4317 #[simd_test(enable = "avx2")]
4318 const fn test_mm_broadcastq_epi64() {
4319 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4320 let res = _mm_broadcastq_epi64(a);
4321 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4322 }
4323
4324 #[simd_test(enable = "avx2")]
4325 const fn test_mm256_broadcastq_epi64() {
4326 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4327 let res = _mm256_broadcastq_epi64(a);
4328 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4329 }
4330
4331 #[simd_test(enable = "avx2")]
4332 const fn test_mm_broadcastsd_pd() {
4333 let a = _mm_setr_pd(6.88, 3.44);
4334 let res = _mm_broadcastsd_pd(a);
4335 assert_eq_m128d(res, _mm_set1_pd(6.88));
4336 }
4337
4338 #[simd_test(enable = "avx2")]
4339 const fn test_mm256_broadcastsd_pd() {
4340 let a = _mm_setr_pd(6.88, 3.44);
4341 let res = _mm256_broadcastsd_pd(a);
4342 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4343 }
4344
4345 #[simd_test(enable = "avx2")]
4346 const fn test_mm_broadcastsi128_si256() {
4347 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4348 let res = _mm_broadcastsi128_si256(a);
4349 let retval = _mm256_setr_epi64x(
4350 0x0987654321012334,
4351 0x5678909876543210,
4352 0x0987654321012334,
4353 0x5678909876543210,
4354 );
4355 assert_eq_m256i(res, retval);
4356 }
4357
4358 #[simd_test(enable = "avx2")]
4359 const fn test_mm256_broadcastsi128_si256() {
4360 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4361 let res = _mm256_broadcastsi128_si256(a);
4362 let retval = _mm256_setr_epi64x(
4363 0x0987654321012334,
4364 0x5678909876543210,
4365 0x0987654321012334,
4366 0x5678909876543210,
4367 );
4368 assert_eq_m256i(res, retval);
4369 }
4370
4371 #[simd_test(enable = "avx2")]
4372 const fn test_mm_broadcastss_ps() {
4373 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4374 let res = _mm_broadcastss_ps(a);
4375 assert_eq_m128(res, _mm_set1_ps(6.88));
4376 }
4377
4378 #[simd_test(enable = "avx2")]
4379 const fn test_mm256_broadcastss_ps() {
4380 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4381 let res = _mm256_broadcastss_ps(a);
4382 assert_eq_m256(res, _mm256_set1_ps(6.88));
4383 }
4384
4385 #[simd_test(enable = "avx2")]
4386 const fn test_mm_broadcastw_epi16() {
4387 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4388 let res = _mm_broadcastw_epi16(a);
4389 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4390 }
4391
4392 #[simd_test(enable = "avx2")]
4393 const fn test_mm256_broadcastw_epi16() {
4394 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4395 let res = _mm256_broadcastw_epi16(a);
4396 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4397 }
4398
4399 #[simd_test(enable = "avx2")]
4400 const fn test_mm256_cmpeq_epi8() {
4401 #[rustfmt::skip]
4402 let a = _mm256_setr_epi8(
4403 0, 1, 2, 3, 4, 5, 6, 7,
4404 8, 9, 10, 11, 12, 13, 14, 15,
4405 16, 17, 18, 19, 20, 21, 22, 23,
4406 24, 25, 26, 27, 28, 29, 30, 31,
4407 );
4408 #[rustfmt::skip]
4409 let b = _mm256_setr_epi8(
4410 31, 30, 2, 28, 27, 26, 25, 24,
4411 23, 22, 21, 20, 19, 18, 17, 16,
4412 15, 14, 13, 12, 11, 10, 9, 8,
4413 7, 6, 5, 4, 3, 2, 1, 0,
4414 );
4415 let r = _mm256_cmpeq_epi8(a, b);
4416 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4417 }
4418
4419 #[simd_test(enable = "avx2")]
4420 const fn test_mm256_cmpeq_epi16() {
4421 #[rustfmt::skip]
4422 let a = _mm256_setr_epi16(
4423 0, 1, 2, 3, 4, 5, 6, 7,
4424 8, 9, 10, 11, 12, 13, 14, 15,
4425 );
4426 #[rustfmt::skip]
4427 let b = _mm256_setr_epi16(
4428 15, 14, 2, 12, 11, 10, 9, 8,
4429 7, 6, 5, 4, 3, 2, 1, 0,
4430 );
4431 let r = _mm256_cmpeq_epi16(a, b);
4432 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4433 }
4434
4435 #[simd_test(enable = "avx2")]
4436 const fn test_mm256_cmpeq_epi32() {
4437 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4438 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4439 let r = _mm256_cmpeq_epi32(a, b);
4440 let e = _mm256_set1_epi32(0);
4441 let e = _mm256_insert_epi32::<2>(e, !0);
4442 assert_eq_m256i(r, e);
4443 }
4444
4445 #[simd_test(enable = "avx2")]
4446 const fn test_mm256_cmpeq_epi64() {
4447 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4448 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4449 let r = _mm256_cmpeq_epi64(a, b);
4450 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4451 }
4452
4453 #[simd_test(enable = "avx2")]
4454 const fn test_mm256_cmpgt_epi8() {
4455 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4456 let b = _mm256_set1_epi8(0);
4457 let r = _mm256_cmpgt_epi8(a, b);
4458 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4459 }
4460
4461 #[simd_test(enable = "avx2")]
4462 const fn test_mm256_cmpgt_epi16() {
4463 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4464 let b = _mm256_set1_epi16(0);
4465 let r = _mm256_cmpgt_epi16(a, b);
4466 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4467 }
4468
4469 #[simd_test(enable = "avx2")]
4470 const fn test_mm256_cmpgt_epi32() {
4471 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4472 let b = _mm256_set1_epi32(0);
4473 let r = _mm256_cmpgt_epi32(a, b);
4474 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4475 }
4476
4477 #[simd_test(enable = "avx2")]
4478 const fn test_mm256_cmpgt_epi64() {
4479 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4480 let b = _mm256_set1_epi64x(0);
4481 let r = _mm256_cmpgt_epi64(a, b);
4482 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4483 }
4484
4485 #[simd_test(enable = "avx2")]
4486 const fn test_mm256_cvtepi8_epi16() {
4487 #[rustfmt::skip]
4488 let a = _mm_setr_epi8(
4489 0, 0, -1, 1, -2, 2, -3, 3,
4490 -4, 4, -5, 5, -6, 6, -7, 7,
4491 );
4492 #[rustfmt::skip]
4493 let r = _mm256_setr_epi16(
4494 0, 0, -1, 1, -2, 2, -3, 3,
4495 -4, 4, -5, 5, -6, 6, -7, 7,
4496 );
4497 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4498 }
4499
4500 #[simd_test(enable = "avx2")]
4501 const fn test_mm256_cvtepi8_epi32() {
4502 #[rustfmt::skip]
4503 let a = _mm_setr_epi8(
4504 0, 0, -1, 1, -2, 2, -3, 3,
4505 -4, 4, -5, 5, -6, 6, -7, 7,
4506 );
4507 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4508 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4509 }
4510
4511 #[simd_test(enable = "avx2")]
4512 const fn test_mm256_cvtepi8_epi64() {
4513 #[rustfmt::skip]
4514 let a = _mm_setr_epi8(
4515 0, 0, -1, 1, -2, 2, -3, 3,
4516 -4, 4, -5, 5, -6, 6, -7, 7,
4517 );
4518 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4519 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4520 }
4521
4522 #[simd_test(enable = "avx2")]
4523 const fn test_mm256_cvtepi16_epi32() {
4524 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4525 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4526 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4527 }
4528
4529 #[simd_test(enable = "avx2")]
4530 const fn test_mm256_cvtepi16_epi64() {
4531 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4532 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4533 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4534 }
4535
4536 #[simd_test(enable = "avx2")]
4537 const fn test_mm256_cvtepi32_epi64() {
4538 let a = _mm_setr_epi32(0, 0, -1, 1);
4539 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4540 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4541 }
4542
4543 #[simd_test(enable = "avx2")]
4544 const fn test_mm256_cvtepu16_epi32() {
4545 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4546 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4547 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4548 }
4549
4550 #[simd_test(enable = "avx2")]
4551 const fn test_mm256_cvtepu16_epi64() {
4552 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4553 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4554 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4555 }
4556
4557 #[simd_test(enable = "avx2")]
4558 const fn test_mm256_cvtepu32_epi64() {
4559 let a = _mm_setr_epi32(0, 1, 2, 3);
4560 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4561 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4562 }
4563
4564 #[simd_test(enable = "avx2")]
4565 const fn test_mm256_cvtepu8_epi16() {
4566 #[rustfmt::skip]
4567 let a = _mm_setr_epi8(
4568 0, 1, 2, 3, 4, 5, 6, 7,
4569 8, 9, 10, 11, 12, 13, 14, 15,
4570 );
4571 #[rustfmt::skip]
4572 let r = _mm256_setr_epi16(
4573 0, 1, 2, 3, 4, 5, 6, 7,
4574 8, 9, 10, 11, 12, 13, 14, 15,
4575 );
4576 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4577 }
4578
4579 #[simd_test(enable = "avx2")]
4580 const fn test_mm256_cvtepu8_epi32() {
4581 #[rustfmt::skip]
4582 let a = _mm_setr_epi8(
4583 0, 1, 2, 3, 4, 5, 6, 7,
4584 8, 9, 10, 11, 12, 13, 14, 15,
4585 );
4586 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4587 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4588 }
4589
4590 #[simd_test(enable = "avx2")]
4591 const fn test_mm256_cvtepu8_epi64() {
4592 #[rustfmt::skip]
4593 let a = _mm_setr_epi8(
4594 0, 1, 2, 3, 4, 5, 6, 7,
4595 8, 9, 10, 11, 12, 13, 14, 15,
4596 );
4597 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4598 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4599 }
4600
4601 #[simd_test(enable = "avx2")]
4602 const fn test_mm256_extracti128_si256() {
4603 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4604 let r = _mm256_extracti128_si256::<1>(a);
4605 let e = _mm_setr_epi64x(3, 4);
4606 assert_eq_m128i(r, e);
4607 }
4608
4609 #[simd_test(enable = "avx2")]
4610 const fn test_mm256_hadd_epi16() {
4611 let a = _mm256_set1_epi16(2);
4612 let b = _mm256_set1_epi16(4);
4613 let r = _mm256_hadd_epi16(a, b);
4614 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4615 assert_eq_m256i(r, e);
4616 }
4617
4618 #[simd_test(enable = "avx2")]
4619 const fn test_mm256_hadd_epi32() {
4620 let a = _mm256_set1_epi32(2);
4621 let b = _mm256_set1_epi32(4);
4622 let r = _mm256_hadd_epi32(a, b);
4623 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4624 assert_eq_m256i(r, e);
4625 }
4626
4627 #[simd_test(enable = "avx2")]
4628 fn test_mm256_hadds_epi16() {
4629 let a = _mm256_set1_epi16(2);
4630 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4631 let a = _mm256_insert_epi16::<1>(a, 1);
4632 let b = _mm256_set1_epi16(4);
4633 let r = _mm256_hadds_epi16(a, b);
4634 #[rustfmt::skip]
4635 let e = _mm256_setr_epi16(
4636 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4637 4, 4, 4, 4, 8, 8, 8, 8,
4638 );
4639 assert_eq_m256i(r, e);
4640 }
4641
4642 #[simd_test(enable = "avx2")]
4643 const fn test_mm256_hsub_epi16() {
4644 let a = _mm256_set1_epi16(2);
4645 let b = _mm256_set1_epi16(4);
4646 let r = _mm256_hsub_epi16(a, b);
4647 let e = _mm256_set1_epi16(0);
4648 assert_eq_m256i(r, e);
4649 }
4650
4651 #[simd_test(enable = "avx2")]
4652 const fn test_mm256_hsub_epi32() {
4653 let a = _mm256_set1_epi32(2);
4654 let b = _mm256_set1_epi32(4);
4655 let r = _mm256_hsub_epi32(a, b);
4656 let e = _mm256_set1_epi32(0);
4657 assert_eq_m256i(r, e);
4658 }
4659
4660 #[simd_test(enable = "avx2")]
4661 fn test_mm256_hsubs_epi16() {
4662 let a = _mm256_set1_epi16(2);
4663 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4664 let a = _mm256_insert_epi16::<1>(a, -1);
4665 let b = _mm256_set1_epi16(4);
4666 let r = _mm256_hsubs_epi16(a, b);
4667 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4668 assert_eq_m256i(r, e);
4669 }
4670
4671 #[simd_test(enable = "avx2")]
4672 fn test_mm256_madd_epi16() {
4673 let a = _mm256_set1_epi16(2);
4674 let b = _mm256_set1_epi16(4);
4675 let r = _mm256_madd_epi16(a, b);
4676 let e = _mm256_set1_epi32(16);
4677 assert_eq_m256i(r, e);
4678 }
4679
4680 #[target_feature(enable = "avx2")]
4681 #[cfg_attr(test, assert_instr(vpmaddwd))]
4682 unsafe fn test_mm256_madd_epi16_mul_one(v: __m256i) -> __m256i {
4683 // This is a trick used in the adler32 algorithm to get a widening addition. The
4684 // multiplication by 1 is trivial, but must not be optimized out because then the vpmaddwd
4685 // instruction is no longer selected. The assert_instr verifies that this is the case.
4686 let one_v = _mm256_set1_epi16(1);
4687 _mm256_madd_epi16(v, one_v)
4688 }
4689
4690 #[target_feature(enable = "avx2")]
4691 #[cfg_attr(test, assert_instr(vpmaddwd))]
4692 unsafe fn test_mm256_madd_epi16_shl(v: __m256i) -> __m256i {
4693 // This is a trick used in the base64 algorithm to get a widening addition. Instead of a
4694 // multiplication, a vector shl is used. In LLVM 22 that breaks the pattern recognition
4695 // for the automatic optimization to vpmaddwd.
4696 let shift_value = _mm256_set1_epi32(12i32);
4697 _mm256_madd_epi16(v, shift_value)
4698 }
4699
4700 #[simd_test(enable = "avx2")]
4701 const fn test_mm256_inserti128_si256() {
4702 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4703 let b = _mm_setr_epi64x(7, 8);
4704 let r = _mm256_inserti128_si256::<1>(a, b);
4705 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4706 assert_eq_m256i(r, e);
4707 }
4708
4709 #[simd_test(enable = "avx2")]
4710 fn test_mm256_maddubs_epi16() {
4711 let a = _mm256_set1_epi8(2);
4712 let b = _mm256_set1_epi8(4);
4713 let r = _mm256_maddubs_epi16(a, b);
4714 let e = _mm256_set1_epi16(16);
4715 assert_eq_m256i(r, e);
4716 }
4717
4718 #[simd_test(enable = "avx2")]
4719 const fn test_mm_maskload_epi32() {
4720 let nums = [1, 2, 3, 4];
4721 let a = &nums as *const i32;
4722 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4723 let r = unsafe { _mm_maskload_epi32(a, mask) };
4724 let e = _mm_setr_epi32(1, 0, 0, 4);
4725 assert_eq_m128i(r, e);
4726 }
4727
4728 #[simd_test(enable = "avx2")]
4729 const fn test_mm256_maskload_epi32() {
4730 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4731 let a = &nums as *const i32;
4732 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4733 let r = unsafe { _mm256_maskload_epi32(a, mask) };
4734 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4735 assert_eq_m256i(r, e);
4736 }
4737
4738 #[simd_test(enable = "avx2")]
4739 const fn test_mm_maskload_epi64() {
4740 let nums = [1_i64, 2_i64];
4741 let a = &nums as *const i64;
4742 let mask = _mm_setr_epi64x(0, -1);
4743 let r = unsafe { _mm_maskload_epi64(a, mask) };
4744 let e = _mm_setr_epi64x(0, 2);
4745 assert_eq_m128i(r, e);
4746 }
4747
4748 #[simd_test(enable = "avx2")]
4749 const fn test_mm256_maskload_epi64() {
4750 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4751 let a = &nums as *const i64;
4752 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4753 let r = unsafe { _mm256_maskload_epi64(a, mask) };
4754 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4755 assert_eq_m256i(r, e);
4756 }
4757
4758 #[simd_test(enable = "avx2")]
4759 const fn test_mm_maskstore_epi32() {
4760 let a = _mm_setr_epi32(1, 2, 3, 4);
4761 let mut arr = [-1, -1, -1, -1];
4762 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4763 unsafe {
4764 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4765 }
4766 let e = [1, -1, -1, 4];
4767 assert_eq!(arr, e);
4768 }
4769
4770 #[simd_test(enable = "avx2")]
4771 const fn test_mm256_maskstore_epi32() {
4772 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4773 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4774 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4775 unsafe {
4776 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4777 }
4778 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4779 assert_eq!(arr, e);
4780 }
4781
4782 #[simd_test(enable = "avx2")]
4783 const fn test_mm_maskstore_epi64() {
4784 let a = _mm_setr_epi64x(1_i64, 2_i64);
4785 let mut arr = [-1_i64, -1_i64];
4786 let mask = _mm_setr_epi64x(0, -1);
4787 unsafe {
4788 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4789 }
4790 let e = [-1, 2];
4791 assert_eq!(arr, e);
4792 }
4793
4794 #[simd_test(enable = "avx2")]
4795 const fn test_mm256_maskstore_epi64() {
4796 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4797 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4798 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4799 unsafe {
4800 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4801 }
4802 let e = [-1, 2, 3, -1];
4803 assert_eq!(arr, e);
4804 }
4805
4806 #[simd_test(enable = "avx2")]
4807 const fn test_mm256_max_epi16() {
4808 let a = _mm256_set1_epi16(2);
4809 let b = _mm256_set1_epi16(4);
4810 let r = _mm256_max_epi16(a, b);
4811 assert_eq_m256i(r, b);
4812 }
4813
4814 #[simd_test(enable = "avx2")]
4815 const fn test_mm256_max_epi32() {
4816 let a = _mm256_set1_epi32(2);
4817 let b = _mm256_set1_epi32(4);
4818 let r = _mm256_max_epi32(a, b);
4819 assert_eq_m256i(r, b);
4820 }
4821
4822 #[simd_test(enable = "avx2")]
4823 const fn test_mm256_max_epi8() {
4824 let a = _mm256_set1_epi8(2);
4825 let b = _mm256_set1_epi8(4);
4826 let r = _mm256_max_epi8(a, b);
4827 assert_eq_m256i(r, b);
4828 }
4829
4830 #[simd_test(enable = "avx2")]
4831 const fn test_mm256_max_epu16() {
4832 let a = _mm256_set1_epi16(2);
4833 let b = _mm256_set1_epi16(4);
4834 let r = _mm256_max_epu16(a, b);
4835 assert_eq_m256i(r, b);
4836 }
4837
4838 #[simd_test(enable = "avx2")]
4839 const fn test_mm256_max_epu32() {
4840 let a = _mm256_set1_epi32(2);
4841 let b = _mm256_set1_epi32(4);
4842 let r = _mm256_max_epu32(a, b);
4843 assert_eq_m256i(r, b);
4844 }
4845
4846 #[simd_test(enable = "avx2")]
4847 const fn test_mm256_max_epu8() {
4848 let a = _mm256_set1_epi8(2);
4849 let b = _mm256_set1_epi8(4);
4850 let r = _mm256_max_epu8(a, b);
4851 assert_eq_m256i(r, b);
4852 }
4853
4854 #[simd_test(enable = "avx2")]
4855 const fn test_mm256_min_epi16() {
4856 let a = _mm256_set1_epi16(2);
4857 let b = _mm256_set1_epi16(4);
4858 let r = _mm256_min_epi16(a, b);
4859 assert_eq_m256i(r, a);
4860 }
4861
4862 #[simd_test(enable = "avx2")]
4863 const fn test_mm256_min_epi32() {
4864 let a = _mm256_set1_epi32(2);
4865 let b = _mm256_set1_epi32(4);
4866 let r = _mm256_min_epi32(a, b);
4867 assert_eq_m256i(r, a);
4868 }
4869
4870 #[simd_test(enable = "avx2")]
4871 const fn test_mm256_min_epi8() {
4872 let a = _mm256_set1_epi8(2);
4873 let b = _mm256_set1_epi8(4);
4874 let r = _mm256_min_epi8(a, b);
4875 assert_eq_m256i(r, a);
4876 }
4877
4878 #[simd_test(enable = "avx2")]
4879 const fn test_mm256_min_epu16() {
4880 let a = _mm256_set1_epi16(2);
4881 let b = _mm256_set1_epi16(4);
4882 let r = _mm256_min_epu16(a, b);
4883 assert_eq_m256i(r, a);
4884 }
4885
4886 #[simd_test(enable = "avx2")]
4887 const fn test_mm256_min_epu32() {
4888 let a = _mm256_set1_epi32(2);
4889 let b = _mm256_set1_epi32(4);
4890 let r = _mm256_min_epu32(a, b);
4891 assert_eq_m256i(r, a);
4892 }
4893
4894 #[simd_test(enable = "avx2")]
4895 const fn test_mm256_min_epu8() {
4896 let a = _mm256_set1_epi8(2);
4897 let b = _mm256_set1_epi8(4);
4898 let r = _mm256_min_epu8(a, b);
4899 assert_eq_m256i(r, a);
4900 }
4901
4902 #[simd_test(enable = "avx2")]
4903 const fn test_mm256_movemask_epi8() {
4904 let a = _mm256_set1_epi8(-1);
4905 let r = _mm256_movemask_epi8(a);
4906 let e = -1;
4907 assert_eq!(r, e);
4908 }
4909
4910 #[simd_test(enable = "avx2")]
4911 fn test_mm256_mpsadbw_epu8() {
4912 let a = _mm256_set1_epi8(2);
4913 let b = _mm256_set1_epi8(4);
4914 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4915 let e = _mm256_set1_epi16(8);
4916 assert_eq_m256i(r, e);
4917 }
4918
4919 #[simd_test(enable = "avx2")]
4920 const fn test_mm256_mul_epi32() {
4921 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4922 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4923 let r = _mm256_mul_epi32(a, b);
4924 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4925 assert_eq_m256i(r, e);
4926 }
4927
4928 #[simd_test(enable = "avx2")]
4929 const fn test_mm256_mul_epu32() {
4930 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4931 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4932 let r = _mm256_mul_epu32(a, b);
4933 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4934 assert_eq_m256i(r, e);
4935 }
4936
4937 #[simd_test(enable = "avx2")]
4938 const fn test_mm256_mulhi_epi16() {
4939 let a = _mm256_set1_epi16(6535);
4940 let b = _mm256_set1_epi16(6535);
4941 let r = _mm256_mulhi_epi16(a, b);
4942 let e = _mm256_set1_epi16(651);
4943 assert_eq_m256i(r, e);
4944 }
4945
4946 #[simd_test(enable = "avx2")]
4947 const fn test_mm256_mulhi_epu16() {
4948 let a = _mm256_set1_epi16(6535);
4949 let b = _mm256_set1_epi16(6535);
4950 let r = _mm256_mulhi_epu16(a, b);
4951 let e = _mm256_set1_epi16(651);
4952 assert_eq_m256i(r, e);
4953 }
4954
4955 #[simd_test(enable = "avx2")]
4956 const fn test_mm256_mullo_epi16() {
4957 let a = _mm256_set1_epi16(2);
4958 let b = _mm256_set1_epi16(4);
4959 let r = _mm256_mullo_epi16(a, b);
4960 let e = _mm256_set1_epi16(8);
4961 assert_eq_m256i(r, e);
4962 }
4963
4964 #[simd_test(enable = "avx2")]
4965 const fn test_mm256_mullo_epi32() {
4966 let a = _mm256_set1_epi32(2);
4967 let b = _mm256_set1_epi32(4);
4968 let r = _mm256_mullo_epi32(a, b);
4969 let e = _mm256_set1_epi32(8);
4970 assert_eq_m256i(r, e);
4971 }
4972
4973 #[simd_test(enable = "avx2")]
4974 fn test_mm256_mulhrs_epi16() {
4975 let a = _mm256_set1_epi16(2);
4976 let b = _mm256_set1_epi16(4);
4977 let r = _mm256_mullo_epi16(a, b);
4978 let e = _mm256_set1_epi16(8);
4979 assert_eq_m256i(r, e);
4980 }
4981
4982 #[simd_test(enable = "avx2")]
4983 const fn test_mm256_or_si256() {
4984 let a = _mm256_set1_epi8(-1);
4985 let b = _mm256_set1_epi8(0);
4986 let r = _mm256_or_si256(a, b);
4987 assert_eq_m256i(r, a);
4988 }
4989
4990 #[simd_test(enable = "avx2")]
4991 fn test_mm256_packs_epi16() {
4992 let a = _mm256_set1_epi16(2);
4993 let b = _mm256_set1_epi16(4);
4994 let r = _mm256_packs_epi16(a, b);
4995 #[rustfmt::skip]
4996 let e = _mm256_setr_epi8(
4997 2, 2, 2, 2, 2, 2, 2, 2,
4998 4, 4, 4, 4, 4, 4, 4, 4,
4999 2, 2, 2, 2, 2, 2, 2, 2,
5000 4, 4, 4, 4, 4, 4, 4, 4,
5001 );
5002
5003 assert_eq_m256i(r, e);
5004 }
5005
5006 #[simd_test(enable = "avx2")]
5007 fn test_mm256_packs_epi32() {
5008 let a = _mm256_set1_epi32(2);
5009 let b = _mm256_set1_epi32(4);
5010 let r = _mm256_packs_epi32(a, b);
5011 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5012
5013 assert_eq_m256i(r, e);
5014 }
5015
5016 #[simd_test(enable = "avx2")]
5017 fn test_mm256_packus_epi16() {
5018 let a = _mm256_set1_epi16(2);
5019 let b = _mm256_set1_epi16(4);
5020 let r = _mm256_packus_epi16(a, b);
5021 #[rustfmt::skip]
5022 let e = _mm256_setr_epi8(
5023 2, 2, 2, 2, 2, 2, 2, 2,
5024 4, 4, 4, 4, 4, 4, 4, 4,
5025 2, 2, 2, 2, 2, 2, 2, 2,
5026 4, 4, 4, 4, 4, 4, 4, 4,
5027 );
5028
5029 assert_eq_m256i(r, e);
5030 }
5031
5032 #[simd_test(enable = "avx2")]
5033 fn test_mm256_packus_epi32() {
5034 let a = _mm256_set1_epi32(2);
5035 let b = _mm256_set1_epi32(4);
5036 let r = _mm256_packus_epi32(a, b);
5037 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5038
5039 assert_eq_m256i(r, e);
5040 }
5041
5042 #[simd_test(enable = "avx2")]
5043 fn test_mm256_sad_epu8() {
5044 let a = _mm256_set1_epi8(2);
5045 let b = _mm256_set1_epi8(4);
5046 let r = _mm256_sad_epu8(a, b);
5047 let e = _mm256_set1_epi64x(16);
5048 assert_eq_m256i(r, e);
5049 }
5050
5051 #[simd_test(enable = "avx2")]
5052 const fn test_mm256_shufflehi_epi16() {
5053 #[rustfmt::skip]
5054 let a = _mm256_setr_epi16(
5055 0, 1, 2, 3, 11, 22, 33, 44,
5056 4, 5, 6, 7, 55, 66, 77, 88,
5057 );
5058 #[rustfmt::skip]
5059 let e = _mm256_setr_epi16(
5060 0, 1, 2, 3, 44, 22, 22, 11,
5061 4, 5, 6, 7, 88, 66, 66, 55,
5062 );
5063 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
5064 assert_eq_m256i(r, e);
5065 }
5066
5067 #[simd_test(enable = "avx2")]
5068 const fn test_mm256_shufflelo_epi16() {
5069 #[rustfmt::skip]
5070 let a = _mm256_setr_epi16(
5071 11, 22, 33, 44, 0, 1, 2, 3,
5072 55, 66, 77, 88, 4, 5, 6, 7,
5073 );
5074 #[rustfmt::skip]
5075 let e = _mm256_setr_epi16(
5076 44, 22, 22, 11, 0, 1, 2, 3,
5077 88, 66, 66, 55, 4, 5, 6, 7,
5078 );
5079 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
5080 assert_eq_m256i(r, e);
5081 }
5082
5083 #[simd_test(enable = "avx2")]
5084 fn test_mm256_sign_epi16() {
5085 let a = _mm256_set1_epi16(2);
5086 let b = _mm256_set1_epi16(-1);
5087 let r = _mm256_sign_epi16(a, b);
5088 let e = _mm256_set1_epi16(-2);
5089 assert_eq_m256i(r, e);
5090 }
5091
5092 #[simd_test(enable = "avx2")]
5093 fn test_mm256_sign_epi32() {
5094 let a = _mm256_set1_epi32(2);
5095 let b = _mm256_set1_epi32(-1);
5096 let r = _mm256_sign_epi32(a, b);
5097 let e = _mm256_set1_epi32(-2);
5098 assert_eq_m256i(r, e);
5099 }
5100
5101 #[simd_test(enable = "avx2")]
5102 fn test_mm256_sign_epi8() {
5103 let a = _mm256_set1_epi8(2);
5104 let b = _mm256_set1_epi8(-1);
5105 let r = _mm256_sign_epi8(a, b);
5106 let e = _mm256_set1_epi8(-2);
5107 assert_eq_m256i(r, e);
5108 }
5109
5110 #[simd_test(enable = "avx2")]
5111 fn test_mm256_sll_epi16() {
5112 let a = _mm256_set1_epi16(0xFF);
5113 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5114 let r = _mm256_sll_epi16(a, b);
5115 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
5116 }
5117
5118 #[simd_test(enable = "avx2")]
5119 fn test_mm256_sll_epi32() {
5120 let a = _mm256_set1_epi32(0xFFFF);
5121 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5122 let r = _mm256_sll_epi32(a, b);
5123 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
5124 }
5125
5126 #[simd_test(enable = "avx2")]
5127 fn test_mm256_sll_epi64() {
5128 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5129 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
5130 let r = _mm256_sll_epi64(a, b);
5131 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
5132 }
5133
5134 #[simd_test(enable = "avx2")]
5135 const fn test_mm256_slli_epi16() {
5136 assert_eq_m256i(
5137 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5138 _mm256_set1_epi16(0xFF0),
5139 );
5140 }
5141
5142 #[simd_test(enable = "avx2")]
5143 const fn test_mm256_slli_epi32() {
5144 assert_eq_m256i(
5145 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5146 _mm256_set1_epi32(0xFFFF0),
5147 );
5148 }
5149
5150 #[simd_test(enable = "avx2")]
5151 const fn test_mm256_slli_epi64() {
5152 assert_eq_m256i(
5153 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5154 _mm256_set1_epi64x(0xFFFFFFFF0),
5155 );
5156 }
5157
5158 #[simd_test(enable = "avx2")]
5159 const fn test_mm256_slli_si256() {
5160 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5161 let r = _mm256_slli_si256::<3>(a);
5162 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5163 }
5164
5165 #[simd_test(enable = "avx2")]
5166 const fn test_mm_sllv_epi32() {
5167 let a = _mm_set1_epi32(2);
5168 let b = _mm_set1_epi32(1);
5169 let r = _mm_sllv_epi32(a, b);
5170 let e = _mm_set1_epi32(4);
5171 assert_eq_m128i(r, e);
5172 }
5173
5174 #[simd_test(enable = "avx2")]
5175 const fn test_mm256_sllv_epi32() {
5176 let a = _mm256_set1_epi32(2);
5177 let b = _mm256_set1_epi32(1);
5178 let r = _mm256_sllv_epi32(a, b);
5179 let e = _mm256_set1_epi32(4);
5180 assert_eq_m256i(r, e);
5181 }
5182
5183 #[simd_test(enable = "avx2")]
5184 const fn test_mm_sllv_epi64() {
5185 let a = _mm_set1_epi64x(2);
5186 let b = _mm_set1_epi64x(1);
5187 let r = _mm_sllv_epi64(a, b);
5188 let e = _mm_set1_epi64x(4);
5189 assert_eq_m128i(r, e);
5190 }
5191
5192 #[simd_test(enable = "avx2")]
5193 const fn test_mm256_sllv_epi64() {
5194 let a = _mm256_set1_epi64x(2);
5195 let b = _mm256_set1_epi64x(1);
5196 let r = _mm256_sllv_epi64(a, b);
5197 let e = _mm256_set1_epi64x(4);
5198 assert_eq_m256i(r, e);
5199 }
5200
5201 #[simd_test(enable = "avx2")]
5202 fn test_mm256_sra_epi16() {
5203 let a = _mm256_set1_epi16(-1);
5204 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5205 let r = _mm256_sra_epi16(a, b);
5206 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5207 }
5208
5209 #[simd_test(enable = "avx2")]
5210 fn test_mm256_sra_epi32() {
5211 let a = _mm256_set1_epi32(-1);
5212 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5213 let r = _mm256_sra_epi32(a, b);
5214 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5215 }
5216
5217 #[simd_test(enable = "avx2")]
5218 const fn test_mm256_srai_epi16() {
5219 assert_eq_m256i(
5220 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5221 _mm256_set1_epi16(-1),
5222 );
5223 }
5224
5225 #[simd_test(enable = "avx2")]
5226 const fn test_mm256_srai_epi32() {
5227 assert_eq_m256i(
5228 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5229 _mm256_set1_epi32(-1),
5230 );
5231 }
5232
5233 #[simd_test(enable = "avx2")]
5234 const fn test_mm_srav_epi32() {
5235 let a = _mm_set1_epi32(4);
5236 let count = _mm_set1_epi32(1);
5237 let r = _mm_srav_epi32(a, count);
5238 let e = _mm_set1_epi32(2);
5239 assert_eq_m128i(r, e);
5240 }
5241
5242 #[simd_test(enable = "avx2")]
5243 const fn test_mm256_srav_epi32() {
5244 let a = _mm256_set1_epi32(4);
5245 let count = _mm256_set1_epi32(1);
5246 let r = _mm256_srav_epi32(a, count);
5247 let e = _mm256_set1_epi32(2);
5248 assert_eq_m256i(r, e);
5249 }
5250
5251 #[simd_test(enable = "avx2")]
5252 const fn test_mm256_srli_si256() {
5253 #[rustfmt::skip]
5254 let a = _mm256_setr_epi8(
5255 1, 2, 3, 4, 5, 6, 7, 8,
5256 9, 10, 11, 12, 13, 14, 15, 16,
5257 17, 18, 19, 20, 21, 22, 23, 24,
5258 25, 26, 27, 28, 29, 30, 31, 32,
5259 );
5260 let r = _mm256_srli_si256::<3>(a);
5261 #[rustfmt::skip]
5262 let e = _mm256_setr_epi8(
5263 4, 5, 6, 7, 8, 9, 10, 11,
5264 12, 13, 14, 15, 16, 0, 0, 0,
5265 20, 21, 22, 23, 24, 25, 26, 27,
5266 28, 29, 30, 31, 32, 0, 0, 0,
5267 );
5268 assert_eq_m256i(r, e);
5269 }
5270
5271 #[simd_test(enable = "avx2")]
5272 fn test_mm256_srl_epi16() {
5273 let a = _mm256_set1_epi16(0xFF);
5274 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5275 let r = _mm256_srl_epi16(a, b);
5276 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5277 }
5278
5279 #[simd_test(enable = "avx2")]
5280 fn test_mm256_srl_epi32() {
5281 let a = _mm256_set1_epi32(0xFFFF);
5282 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5283 let r = _mm256_srl_epi32(a, b);
5284 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5285 }
5286
5287 #[simd_test(enable = "avx2")]
5288 fn test_mm256_srl_epi64() {
5289 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5290 let b = _mm_setr_epi64x(4, 0);
5291 let r = _mm256_srl_epi64(a, b);
5292 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5293 }
5294
5295 #[simd_test(enable = "avx2")]
5296 const fn test_mm256_srli_epi16() {
5297 assert_eq_m256i(
5298 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5299 _mm256_set1_epi16(0xF),
5300 );
5301 }
5302
5303 #[simd_test(enable = "avx2")]
5304 const fn test_mm256_srli_epi32() {
5305 assert_eq_m256i(
5306 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5307 _mm256_set1_epi32(0xFFF),
5308 );
5309 }
5310
5311 #[simd_test(enable = "avx2")]
5312 const fn test_mm256_srli_epi64() {
5313 assert_eq_m256i(
5314 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5315 _mm256_set1_epi64x(0xFFFFFFF),
5316 );
5317 }
5318
5319 #[simd_test(enable = "avx2")]
5320 const fn test_mm_srlv_epi32() {
5321 let a = _mm_set1_epi32(2);
5322 let count = _mm_set1_epi32(1);
5323 let r = _mm_srlv_epi32(a, count);
5324 let e = _mm_set1_epi32(1);
5325 assert_eq_m128i(r, e);
5326 }
5327
5328 #[simd_test(enable = "avx2")]
5329 const fn test_mm256_srlv_epi32() {
5330 let a = _mm256_set1_epi32(2);
5331 let count = _mm256_set1_epi32(1);
5332 let r = _mm256_srlv_epi32(a, count);
5333 let e = _mm256_set1_epi32(1);
5334 assert_eq_m256i(r, e);
5335 }
5336
5337 #[simd_test(enable = "avx2")]
5338 const fn test_mm_srlv_epi64() {
5339 let a = _mm_set1_epi64x(2);
5340 let count = _mm_set1_epi64x(1);
5341 let r = _mm_srlv_epi64(a, count);
5342 let e = _mm_set1_epi64x(1);
5343 assert_eq_m128i(r, e);
5344 }
5345
5346 #[simd_test(enable = "avx2")]
5347 const fn test_mm256_srlv_epi64() {
5348 let a = _mm256_set1_epi64x(2);
5349 let count = _mm256_set1_epi64x(1);
5350 let r = _mm256_srlv_epi64(a, count);
5351 let e = _mm256_set1_epi64x(1);
5352 assert_eq_m256i(r, e);
5353 }
5354
5355 #[simd_test(enable = "avx2")]
5356 fn test_mm256_stream_load_si256() {
5357 let a = _mm256_set_epi64x(5, 6, 7, 8);
5358 let r = unsafe { _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _) };
5359 assert_eq_m256i(a, r);
5360 }
5361
5362 #[simd_test(enable = "avx2")]
5363 const fn test_mm256_sub_epi16() {
5364 let a = _mm256_set1_epi16(4);
5365 let b = _mm256_set1_epi16(2);
5366 let r = _mm256_sub_epi16(a, b);
5367 assert_eq_m256i(r, b);
5368 }
5369
5370 #[simd_test(enable = "avx2")]
5371 const fn test_mm256_sub_epi32() {
5372 let a = _mm256_set1_epi32(4);
5373 let b = _mm256_set1_epi32(2);
5374 let r = _mm256_sub_epi32(a, b);
5375 assert_eq_m256i(r, b);
5376 }
5377
5378 #[simd_test(enable = "avx2")]
5379 const fn test_mm256_sub_epi64() {
5380 let a = _mm256_set1_epi64x(4);
5381 let b = _mm256_set1_epi64x(2);
5382 let r = _mm256_sub_epi64(a, b);
5383 assert_eq_m256i(r, b);
5384 }
5385
5386 #[simd_test(enable = "avx2")]
5387 const fn test_mm256_sub_epi8() {
5388 let a = _mm256_set1_epi8(4);
5389 let b = _mm256_set1_epi8(2);
5390 let r = _mm256_sub_epi8(a, b);
5391 assert_eq_m256i(r, b);
5392 }
5393
5394 #[simd_test(enable = "avx2")]
5395 const fn test_mm256_subs_epi16() {
5396 let a = _mm256_set1_epi16(4);
5397 let b = _mm256_set1_epi16(2);
5398 let r = _mm256_subs_epi16(a, b);
5399 assert_eq_m256i(r, b);
5400 }
5401
5402 #[simd_test(enable = "avx2")]
5403 const fn test_mm256_subs_epi8() {
5404 let a = _mm256_set1_epi8(4);
5405 let b = _mm256_set1_epi8(2);
5406 let r = _mm256_subs_epi8(a, b);
5407 assert_eq_m256i(r, b);
5408 }
5409
5410 #[simd_test(enable = "avx2")]
5411 const fn test_mm256_subs_epu16() {
5412 let a = _mm256_set1_epi16(4);
5413 let b = _mm256_set1_epi16(2);
5414 let r = _mm256_subs_epu16(a, b);
5415 assert_eq_m256i(r, b);
5416 }
5417
5418 #[simd_test(enable = "avx2")]
5419 const fn test_mm256_subs_epu8() {
5420 let a = _mm256_set1_epi8(4);
5421 let b = _mm256_set1_epi8(2);
5422 let r = _mm256_subs_epu8(a, b);
5423 assert_eq_m256i(r, b);
5424 }
5425
5426 #[simd_test(enable = "avx2")]
5427 const fn test_mm256_xor_si256() {
5428 let a = _mm256_set1_epi8(5);
5429 let b = _mm256_set1_epi8(3);
5430 let r = _mm256_xor_si256(a, b);
5431 assert_eq_m256i(r, _mm256_set1_epi8(6));
5432 }
5433
5434 #[simd_test(enable = "avx2")]
5435 const fn test_mm256_alignr_epi8() {
5436 #[rustfmt::skip]
5437 let a = _mm256_setr_epi8(
5438 1, 2, 3, 4, 5, 6, 7, 8,
5439 9, 10, 11, 12, 13, 14, 15, 16,
5440 17, 18, 19, 20, 21, 22, 23, 24,
5441 25, 26, 27, 28, 29, 30, 31, 32,
5442 );
5443 #[rustfmt::skip]
5444 let b = _mm256_setr_epi8(
5445 -1, -2, -3, -4, -5, -6, -7, -8,
5446 -9, -10, -11, -12, -13, -14, -15, -16,
5447 -17, -18, -19, -20, -21, -22, -23, -24,
5448 -25, -26, -27, -28, -29, -30, -31, -32,
5449 );
5450 let r = _mm256_alignr_epi8::<33>(a, b);
5451 assert_eq_m256i(r, _mm256_set1_epi8(0));
5452
5453 let r = _mm256_alignr_epi8::<17>(a, b);
5454 #[rustfmt::skip]
5455 let expected = _mm256_setr_epi8(
5456 2, 3, 4, 5, 6, 7, 8, 9,
5457 10, 11, 12, 13, 14, 15, 16, 0,
5458 18, 19, 20, 21, 22, 23, 24, 25,
5459 26, 27, 28, 29, 30, 31, 32, 0,
5460 );
5461 assert_eq_m256i(r, expected);
5462
5463 let r = _mm256_alignr_epi8::<4>(a, b);
5464 #[rustfmt::skip]
5465 let expected = _mm256_setr_epi8(
5466 -5, -6, -7, -8, -9, -10, -11, -12,
5467 -13, -14, -15, -16, 1, 2, 3, 4,
5468 -21, -22, -23, -24, -25, -26, -27, -28,
5469 -29, -30, -31, -32, 17, 18, 19, 20,
5470 );
5471 assert_eq_m256i(r, expected);
5472
5473 let r = _mm256_alignr_epi8::<15>(a, b);
5474 #[rustfmt::skip]
5475 let expected = _mm256_setr_epi8(
5476 -16, 1, 2, 3, 4, 5, 6, 7,
5477 8, 9, 10, 11, 12, 13, 14, 15,
5478 -32, 17, 18, 19, 20, 21, 22, 23,
5479 24, 25, 26, 27, 28, 29, 30, 31,
5480 );
5481 assert_eq_m256i(r, expected);
5482
5483 let r = _mm256_alignr_epi8::<0>(a, b);
5484 assert_eq_m256i(r, b);
5485
5486 let r = _mm256_alignr_epi8::<16>(a, b);
5487 assert_eq_m256i(r, a);
5488 }
5489
5490 #[simd_test(enable = "avx2")]
5491 fn test_mm256_shuffle_epi8() {
5492 #[rustfmt::skip]
5493 let a = _mm256_setr_epi8(
5494 1, 2, 3, 4, 5, 6, 7, 8,
5495 9, 10, 11, 12, 13, 14, 15, 16,
5496 17, 18, 19, 20, 21, 22, 23, 24,
5497 25, 26, 27, 28, 29, 30, 31, 32,
5498 );
5499 #[rustfmt::skip]
5500 let b = _mm256_setr_epi8(
5501 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5502 12, 5, 5, 10, 4, 1, 8, 0,
5503 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5504 12, 5, 5, 10, 4, 1, 8, 0,
5505 );
5506 #[rustfmt::skip]
5507 let expected = _mm256_setr_epi8(
5508 5, 0, 5, 4, 9, 13, 7, 4,
5509 13, 6, 6, 11, 5, 2, 9, 1,
5510 21, 0, 21, 20, 25, 29, 23, 20,
5511 29, 22, 22, 27, 21, 18, 25, 17,
5512 );
5513 let r = _mm256_shuffle_epi8(a, b);
5514 assert_eq_m256i(r, expected);
5515 }
5516
5517 #[simd_test(enable = "avx2")]
5518 fn test_mm256_permutevar8x32_epi32() {
5519 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5520 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5521 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5522 let r = _mm256_permutevar8x32_epi32(a, b);
5523 assert_eq_m256i(r, expected);
5524 }
5525
5526 #[simd_test(enable = "avx2")]
5527 const fn test_mm256_permute4x64_epi64() {
5528 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5529 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5530 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5531 assert_eq_m256i(r, expected);
5532 }
5533
5534 #[simd_test(enable = "avx2")]
5535 const fn test_mm256_permute2x128_si256() {
5536 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5537 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5538 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5539 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5540 assert_eq_m256i(r, e);
5541 }
5542
5543 #[simd_test(enable = "avx2")]
5544 const fn test_mm256_permute4x64_pd() {
5545 let a = _mm256_setr_pd(1., 2., 3., 4.);
5546 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5547 let e = _mm256_setr_pd(4., 1., 2., 1.);
5548 assert_eq_m256d(r, e);
5549 }
5550
5551 #[simd_test(enable = "avx2")]
5552 fn test_mm256_permutevar8x32_ps() {
5553 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5554 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5555 let r = _mm256_permutevar8x32_ps(a, b);
5556 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5557 assert_eq_m256(r, e);
5558 }
5559
5560 #[simd_test(enable = "avx2")]
5561 fn test_mm_i32gather_epi32() {
5562 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5563 // A multiplier of 4 is word-addressing
5564 let r = unsafe { _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
5565 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5566 }
5567
5568 #[simd_test(enable = "avx2")]
5569 fn test_mm_mask_i32gather_epi32() {
5570 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5571 // A multiplier of 4 is word-addressing
5572 let r = unsafe {
5573 _mm_mask_i32gather_epi32::<4>(
5574 _mm_set1_epi32(256),
5575 arr.as_ptr(),
5576 _mm_setr_epi32(0, 16, 64, 96),
5577 _mm_setr_epi32(-1, -1, -1, 0),
5578 )
5579 };
5580 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5581 }
5582
5583 #[simd_test(enable = "avx2")]
5584 fn test_mm256_i32gather_epi32() {
5585 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5586 // A multiplier of 4 is word-addressing
5587 let r = unsafe {
5588 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4))
5589 };
5590 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5591 }
5592
5593 #[simd_test(enable = "avx2")]
5594 fn test_mm256_mask_i32gather_epi32() {
5595 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5596 // A multiplier of 4 is word-addressing
5597 let r = unsafe {
5598 _mm256_mask_i32gather_epi32::<4>(
5599 _mm256_set1_epi32(256),
5600 arr.as_ptr(),
5601 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5602 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5603 )
5604 };
5605 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5606 }
5607
5608 #[simd_test(enable = "avx2")]
5609 fn test_mm_i32gather_ps() {
5610 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5611 // A multiplier of 4 is word-addressing for f32s
5612 let r = unsafe { _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
5613 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5614 }
5615
5616 #[simd_test(enable = "avx2")]
5617 fn test_mm_mask_i32gather_ps() {
5618 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5619 // A multiplier of 4 is word-addressing for f32s
5620 let r = unsafe {
5621 _mm_mask_i32gather_ps::<4>(
5622 _mm_set1_ps(256.0),
5623 arr.as_ptr(),
5624 _mm_setr_epi32(0, 16, 64, 96),
5625 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5626 )
5627 };
5628 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5629 }
5630
5631 #[simd_test(enable = "avx2")]
5632 fn test_mm256_i32gather_ps() {
5633 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5634 // A multiplier of 4 is word-addressing for f32s
5635 let r = unsafe {
5636 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4))
5637 };
5638 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5639 }
5640
5641 #[simd_test(enable = "avx2")]
5642 fn test_mm256_mask_i32gather_ps() {
5643 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5644 // A multiplier of 4 is word-addressing for f32s
5645 let r = unsafe {
5646 _mm256_mask_i32gather_ps::<4>(
5647 _mm256_set1_ps(256.0),
5648 arr.as_ptr(),
5649 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5650 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5651 )
5652 };
5653 assert_eq_m256(
5654 r,
5655 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5656 );
5657 }
5658
5659 #[simd_test(enable = "avx2")]
5660 fn test_mm_i32gather_epi64() {
5661 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5662 // A multiplier of 8 is word-addressing for i64s
5663 let r = unsafe { _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)) };
5664 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5665 }
5666
5667 #[simd_test(enable = "avx2")]
5668 fn test_mm_mask_i32gather_epi64() {
5669 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5670 // A multiplier of 8 is word-addressing for i64s
5671 let r = unsafe {
5672 _mm_mask_i32gather_epi64::<8>(
5673 _mm_set1_epi64x(256),
5674 arr.as_ptr(),
5675 _mm_setr_epi32(16, 16, 16, 16),
5676 _mm_setr_epi64x(-1, 0),
5677 )
5678 };
5679 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5680 }
5681
5682 #[simd_test(enable = "avx2")]
5683 fn test_mm256_i32gather_epi64() {
5684 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5685 // A multiplier of 8 is word-addressing for i64s
5686 let r = unsafe { _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
5687 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5688 }
5689
5690 #[simd_test(enable = "avx2")]
5691 fn test_mm256_mask_i32gather_epi64() {
5692 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5693 // A multiplier of 8 is word-addressing for i64s
5694 let r = unsafe {
5695 _mm256_mask_i32gather_epi64::<8>(
5696 _mm256_set1_epi64x(256),
5697 arr.as_ptr(),
5698 _mm_setr_epi32(0, 16, 64, 96),
5699 _mm256_setr_epi64x(-1, -1, -1, 0),
5700 )
5701 };
5702 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5703 }
5704
5705 #[simd_test(enable = "avx2")]
5706 fn test_mm_i32gather_pd() {
5707 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5708 // A multiplier of 8 is word-addressing for f64s
5709 let r = unsafe { _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)) };
5710 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5711 }
5712
5713 #[simd_test(enable = "avx2")]
5714 fn test_mm_mask_i32gather_pd() {
5715 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5716 // A multiplier of 8 is word-addressing for f64s
5717 let r = unsafe {
5718 _mm_mask_i32gather_pd::<8>(
5719 _mm_set1_pd(256.0),
5720 arr.as_ptr(),
5721 _mm_setr_epi32(16, 16, 16, 16),
5722 _mm_setr_pd(-1.0, 0.0),
5723 )
5724 };
5725 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5726 }
5727
5728 #[simd_test(enable = "avx2")]
5729 fn test_mm256_i32gather_pd() {
5730 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5731 // A multiplier of 8 is word-addressing for f64s
5732 let r = unsafe { _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
5733 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5734 }
5735
5736 #[simd_test(enable = "avx2")]
5737 fn test_mm256_mask_i32gather_pd() {
5738 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5739 // A multiplier of 8 is word-addressing for f64s
5740 let r = unsafe {
5741 _mm256_mask_i32gather_pd::<8>(
5742 _mm256_set1_pd(256.0),
5743 arr.as_ptr(),
5744 _mm_setr_epi32(0, 16, 64, 96),
5745 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5746 )
5747 };
5748 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5749 }
5750
5751 #[simd_test(enable = "avx2")]
5752 fn test_mm_i64gather_epi32() {
5753 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5754 // A multiplier of 4 is word-addressing
5755 let r = unsafe { _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
5756 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5757 }
5758
5759 #[simd_test(enable = "avx2")]
5760 fn test_mm_mask_i64gather_epi32() {
5761 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5762 // A multiplier of 4 is word-addressing
5763 let r = unsafe {
5764 _mm_mask_i64gather_epi32::<4>(
5765 _mm_set1_epi32(256),
5766 arr.as_ptr(),
5767 _mm_setr_epi64x(0, 16),
5768 _mm_setr_epi32(-1, 0, -1, 0),
5769 )
5770 };
5771 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5772 }
5773
5774 #[simd_test(enable = "avx2")]
5775 fn test_mm256_i64gather_epi32() {
5776 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5777 // A multiplier of 4 is word-addressing
5778 let r =
5779 unsafe { _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
5780 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5781 }
5782
5783 #[simd_test(enable = "avx2")]
5784 fn test_mm256_mask_i64gather_epi32() {
5785 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5786 // A multiplier of 4 is word-addressing
5787 let r = unsafe {
5788 _mm256_mask_i64gather_epi32::<4>(
5789 _mm_set1_epi32(256),
5790 arr.as_ptr(),
5791 _mm256_setr_epi64x(0, 16, 64, 96),
5792 _mm_setr_epi32(-1, -1, -1, 0),
5793 )
5794 };
5795 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5796 }
5797
5798 #[simd_test(enable = "avx2")]
5799 fn test_mm_i64gather_ps() {
5800 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5801 // A multiplier of 4 is word-addressing for f32s
5802 let r = unsafe { _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
5803 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5804 }
5805
5806 #[simd_test(enable = "avx2")]
5807 fn test_mm_mask_i64gather_ps() {
5808 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5809 // A multiplier of 4 is word-addressing for f32s
5810 let r = unsafe {
5811 _mm_mask_i64gather_ps::<4>(
5812 _mm_set1_ps(256.0),
5813 arr.as_ptr(),
5814 _mm_setr_epi64x(0, 16),
5815 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5816 )
5817 };
5818 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5819 }
5820
5821 #[simd_test(enable = "avx2")]
5822 fn test_mm256_i64gather_ps() {
5823 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5824 // A multiplier of 4 is word-addressing for f32s
5825 let r =
5826 unsafe { _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
5827 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5828 }
5829
5830 #[simd_test(enable = "avx2")]
5831 fn test_mm256_mask_i64gather_ps() {
5832 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5833 // A multiplier of 4 is word-addressing for f32s
5834 let r = unsafe {
5835 _mm256_mask_i64gather_ps::<4>(
5836 _mm_set1_ps(256.0),
5837 arr.as_ptr(),
5838 _mm256_setr_epi64x(0, 16, 64, 96),
5839 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5840 )
5841 };
5842 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5843 }
5844
5845 #[simd_test(enable = "avx2")]
5846 fn test_mm_i64gather_epi64() {
5847 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5848 // A multiplier of 8 is word-addressing for i64s
5849 let r = unsafe { _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
5850 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5851 }
5852
5853 #[simd_test(enable = "avx2")]
5854 fn test_mm_mask_i64gather_epi64() {
5855 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5856 // A multiplier of 8 is word-addressing for i64s
5857 let r = unsafe {
5858 _mm_mask_i64gather_epi64::<8>(
5859 _mm_set1_epi64x(256),
5860 arr.as_ptr(),
5861 _mm_setr_epi64x(16, 16),
5862 _mm_setr_epi64x(-1, 0),
5863 )
5864 };
5865 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5866 }
5867
5868 #[simd_test(enable = "avx2")]
5869 fn test_mm256_i64gather_epi64() {
5870 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5871 // A multiplier of 8 is word-addressing for i64s
5872 let r =
5873 unsafe { _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
5874 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5875 }
5876
5877 #[simd_test(enable = "avx2")]
5878 fn test_mm256_mask_i64gather_epi64() {
5879 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5880 // A multiplier of 8 is word-addressing for i64s
5881 let r = unsafe {
5882 _mm256_mask_i64gather_epi64::<8>(
5883 _mm256_set1_epi64x(256),
5884 arr.as_ptr(),
5885 _mm256_setr_epi64x(0, 16, 64, 96),
5886 _mm256_setr_epi64x(-1, -1, -1, 0),
5887 )
5888 };
5889 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5890 }
5891
5892 #[simd_test(enable = "avx2")]
5893 fn test_mm_i64gather_pd() {
5894 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5895 // A multiplier of 8 is word-addressing for f64s
5896 let r = unsafe { _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
5897 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5898 }
5899
5900 #[simd_test(enable = "avx2")]
5901 fn test_mm_mask_i64gather_pd() {
5902 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5903 // A multiplier of 8 is word-addressing for f64s
5904 let r = unsafe {
5905 _mm_mask_i64gather_pd::<8>(
5906 _mm_set1_pd(256.0),
5907 arr.as_ptr(),
5908 _mm_setr_epi64x(16, 16),
5909 _mm_setr_pd(-1.0, 0.0),
5910 )
5911 };
5912 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5913 }
5914
5915 #[simd_test(enable = "avx2")]
5916 fn test_mm256_i64gather_pd() {
5917 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5918 // A multiplier of 8 is word-addressing for f64s
5919 let r =
5920 unsafe { _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
5921 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5922 }
5923
5924 #[simd_test(enable = "avx2")]
5925 fn test_mm256_mask_i64gather_pd() {
5926 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5927 // A multiplier of 8 is word-addressing for f64s
5928 let r = unsafe {
5929 _mm256_mask_i64gather_pd::<8>(
5930 _mm256_set1_pd(256.0),
5931 arr.as_ptr(),
5932 _mm256_setr_epi64x(0, 16, 64, 96),
5933 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5934 )
5935 };
5936 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5937 }
5938
5939 #[simd_test(enable = "avx2")]
5940 const fn test_mm256_extract_epi8() {
5941 #[rustfmt::skip]
5942 let a = _mm256_setr_epi8(
5943 -1, 1, 2, 3, 4, 5, 6, 7,
5944 8, 9, 10, 11, 12, 13, 14, 15,
5945 16, 17, 18, 19, 20, 21, 22, 23,
5946 24, 25, 26, 27, 28, 29, 30, 31
5947 );
5948 let r1 = _mm256_extract_epi8::<0>(a);
5949 let r2 = _mm256_extract_epi8::<3>(a);
5950 assert_eq!(r1, 0xFF);
5951 assert_eq!(r2, 3);
5952 }
5953
5954 #[simd_test(enable = "avx2")]
5955 const fn test_mm256_extract_epi16() {
5956 #[rustfmt::skip]
5957 let a = _mm256_setr_epi16(
5958 -1, 1, 2, 3, 4, 5, 6, 7,
5959 8, 9, 10, 11, 12, 13, 14, 15,
5960 );
5961 let r1 = _mm256_extract_epi16::<0>(a);
5962 let r2 = _mm256_extract_epi16::<3>(a);
5963 assert_eq!(r1, 0xFFFF);
5964 assert_eq!(r2, 3);
5965 }
5966}
5967