1//! Advanced Vector Extensions 2 (AVX)
2//!
3//! AVX2 expands most AVX commands to 256-bit wide vector registers and
4//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
5//!
6//! The references are:
7//!
8//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
9//! Instruction Set Reference, A-Z][intel64_ref].
10//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
11//! System Instructions][amd64_ref].
12//!
13//! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
14//! overview of the instructions available.
15//!
16//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
17//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
18//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
19//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
20
21use crate::{
22 core_arch::{simd::*, simd_llvm::*, x86::*},
23 mem::transmute,
24};
25
26#[cfg(test)]
27use stdarch_test::assert_instr;
28
29/// Computes the absolute values of packed 32-bit integers in `a`.
30///
31/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi32)
32#[inline]
33#[target_feature(enable = "avx2")]
34#[cfg_attr(test, assert_instr(vpabsd))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i {
37 transmute(src:pabsd(a.as_i32x8()))
38}
39
40/// Computes the absolute values of packed 16-bit integers in `a`.
41///
42/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi16)
43#[inline]
44#[target_feature(enable = "avx2")]
45#[cfg_attr(test, assert_instr(vpabsw))]
46#[stable(feature = "simd_x86", since = "1.27.0")]
47pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i {
48 transmute(src:pabsw(a.as_i16x16()))
49}
50
51/// Computes the absolute values of packed 8-bit integers in `a`.
52///
53/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi8)
54#[inline]
55#[target_feature(enable = "avx2")]
56#[cfg_attr(test, assert_instr(vpabsb))]
57#[stable(feature = "simd_x86", since = "1.27.0")]
58pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i {
59 transmute(src:pabsb(a.as_i8x32()))
60}
61
62/// Adds packed 64-bit integers in `a` and `b`.
63///
64/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64)
65#[inline]
66#[target_feature(enable = "avx2")]
67#[cfg_attr(test, assert_instr(vpaddq))]
68#[stable(feature = "simd_x86", since = "1.27.0")]
69pub unsafe fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
70 transmute(src:simd_add(x:a.as_i64x4(), y:b.as_i64x4()))
71}
72
73/// Adds packed 32-bit integers in `a` and `b`.
74///
75/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi32)
76#[inline]
77#[target_feature(enable = "avx2")]
78#[cfg_attr(test, assert_instr(vpaddd))]
79#[stable(feature = "simd_x86", since = "1.27.0")]
80pub unsafe fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
81 transmute(src:simd_add(x:a.as_i32x8(), y:b.as_i32x8()))
82}
83
84/// Adds packed 16-bit integers in `a` and `b`.
85///
86/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi16)
87#[inline]
88#[target_feature(enable = "avx2")]
89#[cfg_attr(test, assert_instr(vpaddw))]
90#[stable(feature = "simd_x86", since = "1.27.0")]
91pub unsafe fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
92 transmute(src:simd_add(x:a.as_i16x16(), y:b.as_i16x16()))
93}
94
95/// Adds packed 8-bit integers in `a` and `b`.
96///
97/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi8)
98#[inline]
99#[target_feature(enable = "avx2")]
100#[cfg_attr(test, assert_instr(vpaddb))]
101#[stable(feature = "simd_x86", since = "1.27.0")]
102pub unsafe fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
103 transmute(src:simd_add(x:a.as_i8x32(), y:b.as_i8x32()))
104}
105
106/// Adds packed 8-bit integers in `a` and `b` using saturation.
107///
108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi8)
109#[inline]
110#[target_feature(enable = "avx2")]
111#[cfg_attr(test, assert_instr(vpaddsb))]
112#[stable(feature = "simd_x86", since = "1.27.0")]
113pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
114 transmute(src:simd_saturating_add(x:a.as_i8x32(), y:b.as_i8x32()))
115}
116
117/// Adds packed 16-bit integers in `a` and `b` using saturation.
118///
119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi16)
120#[inline]
121#[target_feature(enable = "avx2")]
122#[cfg_attr(test, assert_instr(vpaddsw))]
123#[stable(feature = "simd_x86", since = "1.27.0")]
124pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
125 transmute(src:simd_saturating_add(x:a.as_i16x16(), y:b.as_i16x16()))
126}
127
128/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
129///
130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu8)
131#[inline]
132#[target_feature(enable = "avx2")]
133#[cfg_attr(test, assert_instr(vpaddusb))]
134#[stable(feature = "simd_x86", since = "1.27.0")]
135pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
136 transmute(src:simd_saturating_add(x:a.as_u8x32(), y:b.as_u8x32()))
137}
138
139/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu16)
142#[inline]
143#[target_feature(enable = "avx2")]
144#[cfg_attr(test, assert_instr(vpaddusw))]
145#[stable(feature = "simd_x86", since = "1.27.0")]
146pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
147 transmute(src:simd_saturating_add(x:a.as_u16x16(), y:b.as_u16x16()))
148}
149
150/// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
151/// result, shifts the result right by `n` bytes, and returns the low 16 bytes.
152///
153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi8)
154#[inline]
155#[target_feature(enable = "avx2")]
156#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
157#[rustc_legacy_const_generics(2)]
158#[stable(feature = "simd_x86", since = "1.27.0")]
159pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
160 static_assert_uimm_bits!(IMM8, 8);
161 // If palignr is shifting the pair of vectors more than the size of two
162 // lanes, emit zero.
163 if IMM8 > 32 {
164 return _mm256_set1_epi8(0);
165 }
166 // If palignr is shifting the pair of input vectors more than one lane,
167 // but less than two lanes, convert to shifting in zeroes.
168 let (a, b) = if IMM8 > 16 {
169 (_mm256_set1_epi8(0), a)
170 } else {
171 (a, b)
172 };
173
174 let a = a.as_i8x32();
175 let b = b.as_i8x32();
176
177 let r: i8x32 = match IMM8 % 16 {
178 0 => simd_shuffle!(
179 b,
180 a,
181 [
182 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
183 23, 24, 25, 26, 27, 28, 29, 30, 31,
184 ],
185 ),
186 1 => simd_shuffle!(
187 b,
188 a,
189 [
190 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, 23,
191 24, 25, 26, 27, 28, 29, 30, 31, 48,
192 ],
193 ),
194 2 => simd_shuffle!(
195 b,
196 a,
197 [
198 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23, 24,
199 25, 26, 27, 28, 29, 30, 31, 48, 49,
200 ],
201 ),
202 3 => simd_shuffle!(
203 b,
204 a,
205 [
206 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23, 24,
207 25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
208 ],
209 ),
210 4 => simd_shuffle!(
211 b,
212 a,
213 [
214 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24, 25,
215 26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
216 ],
217 ),
218 5 => simd_shuffle!(
219 b,
220 a,
221 [
222 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25, 26,
223 27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
224 ],
225 ),
226 6 => simd_shuffle!(
227 b,
228 a,
229 [
230 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26, 27,
231 28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
232 ],
233 ),
234 7 => simd_shuffle!(
235 b,
236 a,
237 [
238 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26, 27,
239 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
240 ],
241 ),
242 8 => simd_shuffle!(
243 b,
244 a,
245 [
246 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27, 28,
247 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
248 ],
249 ),
250 9 => simd_shuffle!(
251 b,
252 a,
253 [
254 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28, 29,
255 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
256 ],
257 ),
258 10 => simd_shuffle!(
259 b,
260 a,
261 [
262 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29, 30,
263 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
264 ],
265 ),
266 11 => simd_shuffle!(
267 b,
268 a,
269 [
270 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30, 31,
271 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
272 ],
273 ),
274 12 => simd_shuffle!(
275 b,
276 a,
277 [
278 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31, 48,
279 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
280 ],
281 ),
282 13 => simd_shuffle!(
283 b,
284 a,
285 [
286 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48, 49,
287 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
288 ],
289 ),
290 14 => simd_shuffle!(
291 b,
292 a,
293 [
294 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49, 50,
295 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
296 ],
297 ),
298 15 => simd_shuffle!(
299 b,
300 a,
301 [
302 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50, 51,
303 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
304 ],
305 ),
306 _ => b,
307 };
308 transmute(r)
309}
310
311/// Computes the bitwise AND of 256 bits (representing integer data)
312/// in `a` and `b`.
313///
314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_si256)
315#[inline]
316#[target_feature(enable = "avx2")]
317#[cfg_attr(test, assert_instr(vandps))]
318#[stable(feature = "simd_x86", since = "1.27.0")]
319pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
320 transmute(src:simd_and(x:a.as_i64x4(), y:b.as_i64x4()))
321}
322
323/// Computes the bitwise NOT of 256 bits (representing integer data)
324/// in `a` and then AND with `b`.
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_si256)
327#[inline]
328#[target_feature(enable = "avx2")]
329#[cfg_attr(test, assert_instr(vandnps))]
330#[stable(feature = "simd_x86", since = "1.27.0")]
331pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
332 let all_ones: __m256i = _mm256_set1_epi8(-1);
333 transmute(src:simd_and(
334 x:simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
335 y:b.as_i64x4(),
336 ))
337}
338
339/// Averages packed unsigned 16-bit integers in `a` and `b`.
340///
341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu16)
342#[inline]
343#[target_feature(enable = "avx2")]
344#[cfg_attr(test, assert_instr(vpavgw))]
345#[stable(feature = "simd_x86", since = "1.27.0")]
346pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
347 let a: u32x16 = simd_cast::<_, u32x16>(a.as_u16x16());
348 let b: u32x16 = simd_cast::<_, u32x16>(b.as_u16x16());
349 let r: u32x16 = simd_shr(x:simd_add(simd_add(a, b), u32x16::splat(1)), y:u32x16::splat(1));
350 transmute(src:simd_cast::<_, u16x16>(r))
351}
352
353/// Averages packed unsigned 8-bit integers in `a` and `b`.
354///
355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu8)
356#[inline]
357#[target_feature(enable = "avx2")]
358#[cfg_attr(test, assert_instr(vpavgb))]
359#[stable(feature = "simd_x86", since = "1.27.0")]
360pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
361 let a: u16x32 = simd_cast::<_, u16x32>(a.as_u8x32());
362 let b: u16x32 = simd_cast::<_, u16x32>(b.as_u8x32());
363 let r: u16x32 = simd_shr(x:simd_add(simd_add(a, b), u16x32::splat(1)), y:u16x32::splat(1));
364 transmute(src:simd_cast::<_, u8x32>(r))
365}
366
367/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM4`.
368///
369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi32)
370#[inline]
371#[target_feature(enable = "avx2")]
372#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
373#[rustc_legacy_const_generics(2)]
374#[stable(feature = "simd_x86", since = "1.27.0")]
375pub unsafe fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
376 static_assert_uimm_bits!(IMM4, 4);
377 let a: i32x4 = a.as_i32x4();
378 let b: i32x4 = b.as_i32x4();
379 let r: i32x4 = simd_shuffle!(
380 a,
381 b,
382 [
383 [0, 4, 0, 4][IMM4 as usize & 0b11],
384 [1, 1, 5, 5][IMM4 as usize & 0b11],
385 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
386 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
387 ],
388 );
389 transmute(src:r)
390}
391
392/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM8`.
393///
394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi32)
395#[inline]
396#[target_feature(enable = "avx2")]
397#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
398#[rustc_legacy_const_generics(2)]
399#[stable(feature = "simd_x86", since = "1.27.0")]
400pub unsafe fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
401 static_assert_uimm_bits!(IMM8, 8);
402 let a: i32x8 = a.as_i32x8();
403 let b: i32x8 = b.as_i32x8();
404 let r: i32x8 = simd_shuffle!(
405 a,
406 b,
407 [
408 [0, 8, 0, 8][IMM8 as usize & 0b11],
409 [1, 1, 9, 9][IMM8 as usize & 0b11],
410 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
411 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
412 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
413 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
414 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
415 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
416 ],
417 );
418 transmute(src:r)
419}
420
421/// Blends packed 16-bit integers from `a` and `b` using control mask `IMM8`.
422///
423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi16)
424#[inline]
425#[target_feature(enable = "avx2")]
426#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
427#[rustc_legacy_const_generics(2)]
428#[stable(feature = "simd_x86", since = "1.27.0")]
429pub unsafe fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
430 static_assert_uimm_bits!(IMM8, 8);
431 let a = a.as_i16x16();
432 let b = b.as_i16x16();
433
434 let r: i16x16 = simd_shuffle!(
435 a,
436 b,
437 [
438 [0, 16, 0, 16][IMM8 as usize & 0b11],
439 [1, 1, 17, 17][IMM8 as usize & 0b11],
440 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
441 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
442 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
443 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
444 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
445 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
446 [8, 24, 8, 24][IMM8 as usize & 0b11],
447 [9, 9, 25, 25][IMM8 as usize & 0b11],
448 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
449 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
450 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
451 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
452 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
453 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
454 ],
455 );
456 transmute(r)
457}
458
459/// Blends packed 8-bit integers from `a` and `b` using `mask`.
460///
461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_epi8)
462#[inline]
463#[target_feature(enable = "avx2")]
464#[cfg_attr(test, assert_instr(vpblendvb))]
465#[stable(feature = "simd_x86", since = "1.27.0")]
466pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
467 let mask: i8x32 = simd_lt(x:mask.as_i8x32(), y:i8x32::splat(0));
468 transmute(src:simd_select(m:mask, a:b.as_i8x32(), b:a.as_i8x32()))
469}
470
471/// Broadcasts the low packed 8-bit integer from `a` to all elements of
472/// the 128-bit returned value.
473///
474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastb_epi8)
475#[inline]
476#[target_feature(enable = "avx2")]
477#[cfg_attr(test, assert_instr(vpbroadcastb))]
478#[stable(feature = "simd_x86", since = "1.27.0")]
479pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
480 let zero: __m128i = _mm_setzero_si128();
481 let ret: i8x16 = simd_shuffle!(a.as_i8x16(), zero.as_i8x16(), [0_u32; 16]);
482 transmute::<i8x16, _>(src:ret)
483}
484
485/// Broadcasts the low packed 8-bit integer from `a` to all elements of
486/// the 256-bit returned value.
487///
488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastb_epi8)
489#[inline]
490#[target_feature(enable = "avx2")]
491#[cfg_attr(test, assert_instr(vpbroadcastb))]
492#[stable(feature = "simd_x86", since = "1.27.0")]
493pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
494 let zero: __m128i = _mm_setzero_si128();
495 let ret: i8x32 = simd_shuffle!(a.as_i8x16(), zero.as_i8x16(), [0_u32; 32]);
496 transmute::<i8x32, _>(src:ret)
497}
498
499// N.B., `simd_shuffle4` with integer data types for `a` and `b` is
500// often compiled to `vbroadcastss`.
501/// Broadcasts the low packed 32-bit integer from `a` to all elements of
502/// the 128-bit returned value.
503///
504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastd_epi32)
505#[inline]
506#[target_feature(enable = "avx2")]
507#[cfg_attr(test, assert_instr(vbroadcastss))]
508#[stable(feature = "simd_x86", since = "1.27.0")]
509pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
510 let zero: __m128i = _mm_setzero_si128();
511 let ret: i32x4 = simd_shuffle!(a.as_i32x4(), zero.as_i32x4(), [0_u32; 4]);
512 transmute::<i32x4, _>(src:ret)
513}
514
515// N.B., `simd_shuffle4`` with integer data types for `a` and `b` is
516// often compiled to `vbroadcastss`.
517/// Broadcasts the low packed 32-bit integer from `a` to all elements of
518/// the 256-bit returned value.
519///
520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastd_epi32)
521#[inline]
522#[target_feature(enable = "avx2")]
523#[cfg_attr(test, assert_instr(vbroadcastss))]
524#[stable(feature = "simd_x86", since = "1.27.0")]
525pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
526 let zero: __m128i = _mm_setzero_si128();
527 let ret: i32x8 = simd_shuffle!(a.as_i32x4(), zero.as_i32x4(), [0_u32; 8]);
528 transmute::<i32x8, _>(src:ret)
529}
530
531/// Broadcasts the low packed 64-bit integer from `a` to all elements of
532/// the 128-bit returned value.
533///
534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastq_epi64)
535#[inline]
536#[target_feature(enable = "avx2")]
537// Emits `vmovddup` instead of `vpbroadcastq`
538// See https://github.com/rust-lang/stdarch/issues/791
539#[cfg_attr(test, assert_instr(vmovddup))]
540#[stable(feature = "simd_x86", since = "1.27.0")]
541pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
542 let ret: i64x2 = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
543 transmute::<i64x2, _>(src:ret)
544}
545
546/// Broadcasts the low packed 64-bit integer from `a` to all elements of
547/// the 256-bit returned value.
548///
549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastq_epi64)
550#[inline]
551#[target_feature(enable = "avx2")]
552#[cfg_attr(test, assert_instr(vbroadcastsd))]
553#[stable(feature = "simd_x86", since = "1.27.0")]
554pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
555 let ret: i64x4 = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
556 transmute::<i64x4, _>(src:ret)
557}
558
559/// Broadcasts the low double-precision (64-bit) floating-point element
560/// from `a` to all elements of the 128-bit returned value.
561///
562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastsd_pd)
563#[inline]
564#[target_feature(enable = "avx2")]
565#[cfg_attr(test, assert_instr(vmovddup))]
566#[stable(feature = "simd_x86", since = "1.27.0")]
567pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
568 simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2])
569}
570
571/// Broadcasts the low double-precision (64-bit) floating-point element
572/// from `a` to all elements of the 256-bit returned value.
573///
574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsd_pd)
575#[inline]
576#[target_feature(enable = "avx2")]
577#[cfg_attr(test, assert_instr(vbroadcastsd))]
578#[stable(feature = "simd_x86", since = "1.27.0")]
579pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
580 simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4])
581}
582
583// N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or
584// `vbroadcastf128`.
585/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
586/// the 256-bit returned value.
587///
588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsi128_si256)
589#[inline]
590#[target_feature(enable = "avx2")]
591#[stable(feature = "simd_x86", since = "1.27.0")]
592pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
593 let zero: __m128i = _mm_setzero_si128();
594 let ret: i64x4 = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [0, 1, 0, 1]);
595 transmute::<i64x4, _>(src:ret)
596}
597
598/// Broadcasts the low single-precision (32-bit) floating-point element
599/// from `a` to all elements of the 128-bit returned value.
600///
601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastss_ps)
602#[inline]
603#[target_feature(enable = "avx2")]
604#[cfg_attr(test, assert_instr(vbroadcastss))]
605#[stable(feature = "simd_x86", since = "1.27.0")]
606pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 {
607 simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4])
608}
609
610/// Broadcasts the low single-precision (32-bit) floating-point element
611/// from `a` to all elements of the 256-bit returned value.
612///
613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastss_ps)
614#[inline]
615#[target_feature(enable = "avx2")]
616#[cfg_attr(test, assert_instr(vbroadcastss))]
617#[stable(feature = "simd_x86", since = "1.27.0")]
618pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
619 simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8])
620}
621
622/// Broadcasts the low packed 16-bit integer from a to all elements of
623/// the 128-bit returned value
624///
625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastw_epi16)
626#[inline]
627#[target_feature(enable = "avx2")]
628#[cfg_attr(test, assert_instr(vpbroadcastw))]
629#[stable(feature = "simd_x86", since = "1.27.0")]
630pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
631 let zero: __m128i = _mm_setzero_si128();
632 let ret: i16x8 = simd_shuffle!(a.as_i16x8(), zero.as_i16x8(), [0_u32; 8]);
633 transmute::<i16x8, _>(src:ret)
634}
635
636/// Broadcasts the low packed 16-bit integer from a to all elements of
637/// the 256-bit returned value
638///
639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastw_epi16)
640#[inline]
641#[target_feature(enable = "avx2")]
642#[cfg_attr(test, assert_instr(vpbroadcastw))]
643#[stable(feature = "simd_x86", since = "1.27.0")]
644pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
645 let zero: __m128i = _mm_setzero_si128();
646 let ret: i16x16 = simd_shuffle!(a.as_i16x8(), zero.as_i16x8(), [0_u32; 16]);
647 transmute::<i16x16, _>(src:ret)
648}
649
650/// Compares packed 64-bit integers in `a` and `b` for equality.
651///
652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64)
653#[inline]
654#[target_feature(enable = "avx2")]
655#[cfg_attr(test, assert_instr(vpcmpeqq))]
656#[stable(feature = "simd_x86", since = "1.27.0")]
657pub unsafe fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
658 transmute::<i64x4, _>(src:simd_eq(x:a.as_i64x4(), y:b.as_i64x4()))
659}
660
661/// Compares packed 32-bit integers in `a` and `b` for equality.
662///
663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32)
664#[inline]
665#[target_feature(enable = "avx2")]
666#[cfg_attr(test, assert_instr(vpcmpeqd))]
667#[stable(feature = "simd_x86", since = "1.27.0")]
668pub unsafe fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
669 transmute::<i32x8, _>(src:simd_eq(x:a.as_i32x8(), y:b.as_i32x8()))
670}
671
672/// Compares packed 16-bit integers in `a` and `b` for equality.
673///
674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16)
675#[inline]
676#[target_feature(enable = "avx2")]
677#[cfg_attr(test, assert_instr(vpcmpeqw))]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub unsafe fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
680 transmute::<i16x16, _>(src:simd_eq(x:a.as_i16x16(), y:b.as_i16x16()))
681}
682
683/// Compares packed 8-bit integers in `a` and `b` for equality.
684///
685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8)
686#[inline]
687#[target_feature(enable = "avx2")]
688#[cfg_attr(test, assert_instr(vpcmpeqb))]
689#[stable(feature = "simd_x86", since = "1.27.0")]
690pub unsafe fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
691 transmute::<i8x32, _>(src:simd_eq(x:a.as_i8x32(), y:b.as_i8x32()))
692}
693
694/// Compares packed 64-bit integers in `a` and `b` for greater-than.
695///
696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64)
697#[inline]
698#[target_feature(enable = "avx2")]
699#[cfg_attr(test, assert_instr(vpcmpgtq))]
700#[stable(feature = "simd_x86", since = "1.27.0")]
701pub unsafe fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
702 transmute::<i64x4, _>(src:simd_gt(x:a.as_i64x4(), y:b.as_i64x4()))
703}
704
705/// Compares packed 32-bit integers in `a` and `b` for greater-than.
706///
707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32)
708#[inline]
709#[target_feature(enable = "avx2")]
710#[cfg_attr(test, assert_instr(vpcmpgtd))]
711#[stable(feature = "simd_x86", since = "1.27.0")]
712pub unsafe fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
713 transmute::<i32x8, _>(src:simd_gt(x:a.as_i32x8(), y:b.as_i32x8()))
714}
715
716/// Compares packed 16-bit integers in `a` and `b` for greater-than.
717///
718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16)
719#[inline]
720#[target_feature(enable = "avx2")]
721#[cfg_attr(test, assert_instr(vpcmpgtw))]
722#[stable(feature = "simd_x86", since = "1.27.0")]
723pub unsafe fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
724 transmute::<i16x16, _>(src:simd_gt(x:a.as_i16x16(), y:b.as_i16x16()))
725}
726
727/// Compares packed 8-bit integers in `a` and `b` for greater-than.
728///
729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8)
730#[inline]
731#[target_feature(enable = "avx2")]
732#[cfg_attr(test, assert_instr(vpcmpgtb))]
733#[stable(feature = "simd_x86", since = "1.27.0")]
734pub unsafe fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
735 transmute::<i8x32, _>(src:simd_gt(x:a.as_i8x32(), y:b.as_i8x32()))
736}
737
738/// Sign-extend 16-bit integers to 32-bit integers.
739///
740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi32)
741#[inline]
742#[target_feature(enable = "avx2")]
743#[cfg_attr(test, assert_instr(vpmovsxwd))]
744#[stable(feature = "simd_x86", since = "1.27.0")]
745pub unsafe fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
746 transmute::<i32x8, _>(src:simd_cast(a.as_i16x8()))
747}
748
749/// Sign-extend 16-bit integers to 64-bit integers.
750///
751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi64)
752#[inline]
753#[target_feature(enable = "avx2")]
754#[cfg_attr(test, assert_instr(vpmovsxwq))]
755#[stable(feature = "simd_x86", since = "1.27.0")]
756pub unsafe fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
757 let a: i16x8 = a.as_i16x8();
758 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
759 transmute::<i64x4, _>(src:simd_cast(v64))
760}
761
762/// Sign-extend 32-bit integers to 64-bit integers.
763///
764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi64)
765#[inline]
766#[target_feature(enable = "avx2")]
767#[cfg_attr(test, assert_instr(vpmovsxdq))]
768#[stable(feature = "simd_x86", since = "1.27.0")]
769pub unsafe fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
770 transmute::<i64x4, _>(src:simd_cast(a.as_i32x4()))
771}
772
773/// Sign-extend 8-bit integers to 16-bit integers.
774///
775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi16)
776#[inline]
777#[target_feature(enable = "avx2")]
778#[cfg_attr(test, assert_instr(vpmovsxbw))]
779#[stable(feature = "simd_x86", since = "1.27.0")]
780pub unsafe fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
781 transmute::<i16x16, _>(src:simd_cast(a.as_i8x16()))
782}
783
784/// Sign-extend 8-bit integers to 32-bit integers.
785///
786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi32)
787#[inline]
788#[target_feature(enable = "avx2")]
789#[cfg_attr(test, assert_instr(vpmovsxbd))]
790#[stable(feature = "simd_x86", since = "1.27.0")]
791pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
792 let a: i8x16 = a.as_i8x16();
793 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
794 transmute::<i32x8, _>(src:simd_cast(v64))
795}
796
797/// Sign-extend 8-bit integers to 64-bit integers.
798///
799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi64)
800#[inline]
801#[target_feature(enable = "avx2")]
802#[cfg_attr(test, assert_instr(vpmovsxbq))]
803#[stable(feature = "simd_x86", since = "1.27.0")]
804pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
805 let a: i8x16 = a.as_i8x16();
806 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
807 transmute::<i64x4, _>(src:simd_cast(v32))
808}
809
810/// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit
811/// integers, and stores the results in `dst`.
812///
813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi32)
814#[inline]
815#[target_feature(enable = "avx2")]
816#[cfg_attr(test, assert_instr(vpmovzxwd))]
817#[stable(feature = "simd_x86", since = "1.27.0")]
818pub unsafe fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
819 transmute::<i32x8, _>(src:simd_cast(a.as_u16x8()))
820}
821
822/// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit
823/// integers. The upper four elements of `a` are unused.
824///
825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi64)
826#[inline]
827#[target_feature(enable = "avx2")]
828#[cfg_attr(test, assert_instr(vpmovzxwq))]
829#[stable(feature = "simd_x86", since = "1.27.0")]
830pub unsafe fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
831 let a: u16x8 = a.as_u16x8();
832 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
833 transmute::<i64x4, _>(src:simd_cast(v64))
834}
835
836/// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers.
837///
838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_epi64)
839#[inline]
840#[target_feature(enable = "avx2")]
841#[cfg_attr(test, assert_instr(vpmovzxdq))]
842#[stable(feature = "simd_x86", since = "1.27.0")]
843pub unsafe fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
844 transmute::<i64x4, _>(src:simd_cast(a.as_u32x4()))
845}
846
847/// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers.
848///
849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi16)
850#[inline]
851#[target_feature(enable = "avx2")]
852#[cfg_attr(test, assert_instr(vpmovzxbw))]
853#[stable(feature = "simd_x86", since = "1.27.0")]
854pub unsafe fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
855 transmute::<i16x16, _>(src:simd_cast(a.as_u8x16()))
856}
857
858/// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit
859/// integers. The upper eight elements of `a` are unused.
860///
861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi32)
862#[inline]
863#[target_feature(enable = "avx2")]
864#[cfg_attr(test, assert_instr(vpmovzxbd))]
865#[stable(feature = "simd_x86", since = "1.27.0")]
866pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
867 let a: u8x16 = a.as_u8x16();
868 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
869 transmute::<i32x8, _>(src:simd_cast(v64))
870}
871
872/// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit
873/// integers. The upper twelve elements of `a` are unused.
874///
875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi64)
876#[inline]
877#[target_feature(enable = "avx2")]
878#[cfg_attr(test, assert_instr(vpmovzxbq))]
879#[stable(feature = "simd_x86", since = "1.27.0")]
880pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
881 let a: u8x16 = a.as_u8x16();
882 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
883 transmute::<i64x4, _>(src:simd_cast(v32))
884}
885
886/// Extracts 128 bits (of integer data) from `a` selected with `IMM1`.
887///
888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti128_si256)
889#[inline]
890#[target_feature(enable = "avx2")]
891#[cfg_attr(
892 all(test, not(target_os = "windows")),
893 assert_instr(vextractf128, IMM1 = 1)
894)]
895#[rustc_legacy_const_generics(1)]
896#[stable(feature = "simd_x86", since = "1.27.0")]
897pub unsafe fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
898 static_assert_uimm_bits!(IMM1, 1);
899 let a: i64x4 = a.as_i64x4();
900 let b: i64x4 = _mm256_undefined_si256().as_i64x4();
901 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
902 transmute(src:dst)
903}
904
905/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`.
906///
907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi16)
908#[inline]
909#[target_feature(enable = "avx2")]
910#[cfg_attr(test, assert_instr(vphaddw))]
911#[stable(feature = "simd_x86", since = "1.27.0")]
912pub unsafe fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
913 transmute(src:phaddw(a:a.as_i16x16(), b:b.as_i16x16()))
914}
915
916/// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`.
917///
918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi32)
919#[inline]
920#[target_feature(enable = "avx2")]
921#[cfg_attr(test, assert_instr(vphaddd))]
922#[stable(feature = "simd_x86", since = "1.27.0")]
923pub unsafe fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
924 transmute(src:phaddd(a:a.as_i32x8(), b:b.as_i32x8()))
925}
926
927/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`
928/// using saturation.
929///
930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadds_epi16)
931#[inline]
932#[target_feature(enable = "avx2")]
933#[cfg_attr(test, assert_instr(vphaddsw))]
934#[stable(feature = "simd_x86", since = "1.27.0")]
935pub unsafe fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
936 transmute(src:phaddsw(a:a.as_i16x16(), b:b.as_i16x16()))
937}
938
939/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`.
940///
941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi16)
942#[inline]
943#[target_feature(enable = "avx2")]
944#[cfg_attr(test, assert_instr(vphsubw))]
945#[stable(feature = "simd_x86", since = "1.27.0")]
946pub unsafe fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
947 transmute(src:phsubw(a:a.as_i16x16(), b:b.as_i16x16()))
948}
949
950/// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`.
951///
952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi32)
953#[inline]
954#[target_feature(enable = "avx2")]
955#[cfg_attr(test, assert_instr(vphsubd))]
956#[stable(feature = "simd_x86", since = "1.27.0")]
957pub unsafe fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
958 transmute(src:phsubd(a:a.as_i32x8(), b:b.as_i32x8()))
959}
960
961/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
962/// using saturation.
963///
964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsubs_epi16)
965#[inline]
966#[target_feature(enable = "avx2")]
967#[cfg_attr(test, assert_instr(vphsubsw))]
968#[stable(feature = "simd_x86", since = "1.27.0")]
969pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
970 transmute(src:phsubsw(a:a.as_i16x16(), b:b.as_i16x16()))
971}
972
973/// Returns values from `slice` at offsets determined by `offsets * scale`,
974/// where
975/// `scale` should be 1, 2, 4 or 8.
976///
977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi32)
978#[inline]
979#[target_feature(enable = "avx2")]
980#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
981#[rustc_legacy_const_generics(2)]
982#[stable(feature = "simd_x86", since = "1.27.0")]
983pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
984 slice: *const i32,
985 offsets: __m128i,
986) -> __m128i {
987 static_assert_imm8_scale!(SCALE);
988 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
989 let neg_one: i32x4 = _mm_set1_epi32(-1).as_i32x4();
990 let offsets: i32x4 = offsets.as_i32x4();
991 let slice: *const i8 = slice as *const i8;
992 let r: i32x4 = pgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
993 transmute(src:r)
994}
995
996/// Returns values from `slice` at offsets determined by `offsets * scale`,
997/// where
998/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
999/// that position instead.
1000///
1001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi32)
1002#[inline]
1003#[target_feature(enable = "avx2")]
1004#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1005#[rustc_legacy_const_generics(4)]
1006#[stable(feature = "simd_x86", since = "1.27.0")]
1007pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1008 src: __m128i,
1009 slice: *const i32,
1010 offsets: __m128i,
1011 mask: __m128i,
1012) -> __m128i {
1013 static_assert_imm8_scale!(SCALE);
1014 let src: i32x4 = src.as_i32x4();
1015 let mask: i32x4 = mask.as_i32x4();
1016 let offsets: i32x4 = offsets.as_i32x4();
1017 let slice: *const i8 = slice as *const i8;
1018 let r: i32x4 = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1019 transmute(src:r)
1020}
1021
1022/// Returns values from `slice` at offsets determined by `offsets * scale`,
1023/// where
1024/// `scale` should be 1, 2, 4 or 8.
1025///
1026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi32)
1027#[inline]
1028#[target_feature(enable = "avx2")]
1029#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1030#[rustc_legacy_const_generics(2)]
1031#[stable(feature = "simd_x86", since = "1.27.0")]
1032pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1033 slice: *const i32,
1034 offsets: __m256i,
1035) -> __m256i {
1036 static_assert_imm8_scale!(SCALE);
1037 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
1038 let neg_one: i32x8 = _mm256_set1_epi32(-1).as_i32x8();
1039 let offsets: i32x8 = offsets.as_i32x8();
1040 let slice: *const i8 = slice as *const i8;
1041 let r: i32x8 = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1042 transmute(src:r)
1043}
1044
1045/// Returns values from `slice` at offsets determined by `offsets * scale`,
1046/// where
1047/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1048/// that position instead.
1049///
1050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi32)
1051#[inline]
1052#[target_feature(enable = "avx2")]
1053#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1054#[rustc_legacy_const_generics(4)]
1055#[stable(feature = "simd_x86", since = "1.27.0")]
1056pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1057 src: __m256i,
1058 slice: *const i32,
1059 offsets: __m256i,
1060 mask: __m256i,
1061) -> __m256i {
1062 static_assert_imm8_scale!(SCALE);
1063 let src: i32x8 = src.as_i32x8();
1064 let mask: i32x8 = mask.as_i32x8();
1065 let offsets: i32x8 = offsets.as_i32x8();
1066 let slice: *const i8 = slice as *const i8;
1067 let r: i32x8 = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1068 transmute(src:r)
1069}
1070
1071/// Returns values from `slice` at offsets determined by `offsets * scale`,
1072/// where
1073/// `scale` should be 1, 2, 4 or 8.
1074///
1075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_ps)
1076#[inline]
1077#[target_feature(enable = "avx2")]
1078#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1079#[rustc_legacy_const_generics(2)]
1080#[stable(feature = "simd_x86", since = "1.27.0")]
1081pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1082 static_assert_imm8_scale!(SCALE);
1083 let zero: __m128 = _mm_setzero_ps();
1084 let neg_one: __m128 = _mm_set1_ps(-1.0);
1085 let offsets: i32x4 = offsets.as_i32x4();
1086 let slice: *const i8 = slice as *const i8;
1087 pgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1088}
1089
1090/// Returns values from `slice` at offsets determined by `offsets * scale`,
1091/// where
1092/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1093/// that position instead.
1094///
1095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_ps)
1096#[inline]
1097#[target_feature(enable = "avx2")]
1098#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1099#[rustc_legacy_const_generics(4)]
1100#[stable(feature = "simd_x86", since = "1.27.0")]
1101pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1102 src: __m128,
1103 slice: *const f32,
1104 offsets: __m128i,
1105 mask: __m128,
1106) -> __m128 {
1107 static_assert_imm8_scale!(SCALE);
1108 let offsets: i32x4 = offsets.as_i32x4();
1109 let slice: *const i8 = slice as *const i8;
1110 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1111}
1112
1113/// Returns values from `slice` at offsets determined by `offsets * scale`,
1114/// where
1115/// `scale` should be 1, 2, 4 or 8.
1116///
1117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_ps)
1118#[inline]
1119#[target_feature(enable = "avx2")]
1120#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1121#[rustc_legacy_const_generics(2)]
1122#[stable(feature = "simd_x86", since = "1.27.0")]
1123pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1124 static_assert_imm8_scale!(SCALE);
1125 let zero: __m256 = _mm256_setzero_ps();
1126 let neg_one: __m256 = _mm256_set1_ps(-1.0);
1127 let offsets: i32x8 = offsets.as_i32x8();
1128 let slice: *const i8 = slice as *const i8;
1129 vpgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1130}
1131
1132/// Returns values from `slice` at offsets determined by `offsets * scale`,
1133/// where
1134/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1135/// that position instead.
1136///
1137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_ps)
1138#[inline]
1139#[target_feature(enable = "avx2")]
1140#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1141#[rustc_legacy_const_generics(4)]
1142#[stable(feature = "simd_x86", since = "1.27.0")]
1143pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1144 src: __m256,
1145 slice: *const f32,
1146 offsets: __m256i,
1147 mask: __m256,
1148) -> __m256 {
1149 static_assert_imm8_scale!(SCALE);
1150 let offsets: i32x8 = offsets.as_i32x8();
1151 let slice: *const i8 = slice as *const i8;
1152 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1153}
1154
1155/// Returns values from `slice` at offsets determined by `offsets * scale`,
1156/// where
1157/// `scale` should be 1, 2, 4 or 8.
1158///
1159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi64)
1160#[inline]
1161#[target_feature(enable = "avx2")]
1162#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1163#[rustc_legacy_const_generics(2)]
1164#[stable(feature = "simd_x86", since = "1.27.0")]
1165pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1166 slice: *const i64,
1167 offsets: __m128i,
1168) -> __m128i {
1169 static_assert_imm8_scale!(SCALE);
1170 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1171 let neg_one: i64x2 = _mm_set1_epi64x(-1).as_i64x2();
1172 let offsets: i32x4 = offsets.as_i32x4();
1173 let slice: *const i8 = slice as *const i8;
1174 let r: i64x2 = pgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1175 transmute(src:r)
1176}
1177
1178/// Returns values from `slice` at offsets determined by `offsets * scale`,
1179/// where
1180/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1181/// that position instead.
1182///
1183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi64)
1184#[inline]
1185#[target_feature(enable = "avx2")]
1186#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1187#[rustc_legacy_const_generics(4)]
1188#[stable(feature = "simd_x86", since = "1.27.0")]
1189pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1190 src: __m128i,
1191 slice: *const i64,
1192 offsets: __m128i,
1193 mask: __m128i,
1194) -> __m128i {
1195 static_assert_imm8_scale!(SCALE);
1196 let src: i64x2 = src.as_i64x2();
1197 let mask: i64x2 = mask.as_i64x2();
1198 let offsets: i32x4 = offsets.as_i32x4();
1199 let slice: *const i8 = slice as *const i8;
1200 let r: i64x2 = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1201 transmute(src:r)
1202}
1203
1204/// Returns values from `slice` at offsets determined by `offsets * scale`,
1205/// where
1206/// `scale` should be 1, 2, 4 or 8.
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi64)
1209#[inline]
1210#[target_feature(enable = "avx2")]
1211#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1212#[rustc_legacy_const_generics(2)]
1213#[stable(feature = "simd_x86", since = "1.27.0")]
1214pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1215 slice: *const i64,
1216 offsets: __m128i,
1217) -> __m256i {
1218 static_assert_imm8_scale!(SCALE);
1219 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1220 let neg_one: i64x4 = _mm256_set1_epi64x(-1).as_i64x4();
1221 let offsets: i32x4 = offsets.as_i32x4();
1222 let slice: *const i8 = slice as *const i8;
1223 let r: i64x4 = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1224 transmute(src:r)
1225}
1226
1227/// Returns values from `slice` at offsets determined by `offsets * scale`,
1228/// where
1229/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1230/// that position instead.
1231///
1232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi64)
1233#[inline]
1234#[target_feature(enable = "avx2")]
1235#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1236#[rustc_legacy_const_generics(4)]
1237#[stable(feature = "simd_x86", since = "1.27.0")]
1238pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1239 src: __m256i,
1240 slice: *const i64,
1241 offsets: __m128i,
1242 mask: __m256i,
1243) -> __m256i {
1244 static_assert_imm8_scale!(SCALE);
1245 let src: i64x4 = src.as_i64x4();
1246 let mask: i64x4 = mask.as_i64x4();
1247 let offsets: i32x4 = offsets.as_i32x4();
1248 let slice: *const i8 = slice as *const i8;
1249 let r: i64x4 = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1250 transmute(src:r)
1251}
1252
1253/// Returns values from `slice` at offsets determined by `offsets * scale`,
1254/// where
1255/// `scale` should be 1, 2, 4 or 8.
1256///
1257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_pd)
1258#[inline]
1259#[target_feature(enable = "avx2")]
1260#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1261#[rustc_legacy_const_generics(2)]
1262#[stable(feature = "simd_x86", since = "1.27.0")]
1263pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1264 static_assert_imm8_scale!(SCALE);
1265 let zero: __m128d = _mm_setzero_pd();
1266 let neg_one: __m128d = _mm_set1_pd(-1.0);
1267 let offsets: i32x4 = offsets.as_i32x4();
1268 let slice: *const i8 = slice as *const i8;
1269 pgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1270}
1271
1272/// Returns values from `slice` at offsets determined by `offsets * scale`,
1273/// where
1274/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1275/// that position instead.
1276///
1277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_pd)
1278#[inline]
1279#[target_feature(enable = "avx2")]
1280#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1281#[rustc_legacy_const_generics(4)]
1282#[stable(feature = "simd_x86", since = "1.27.0")]
1283pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1284 src: __m128d,
1285 slice: *const f64,
1286 offsets: __m128i,
1287 mask: __m128d,
1288) -> __m128d {
1289 static_assert_imm8_scale!(SCALE);
1290 let offsets: i32x4 = offsets.as_i32x4();
1291 let slice: *const i8 = slice as *const i8;
1292 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1293}
1294
1295/// Returns values from `slice` at offsets determined by `offsets * scale`,
1296/// where
1297/// `scale` should be 1, 2, 4 or 8.
1298///
1299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_pd)
1300#[inline]
1301#[target_feature(enable = "avx2")]
1302#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1303#[rustc_legacy_const_generics(2)]
1304#[stable(feature = "simd_x86", since = "1.27.0")]
1305pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1306 slice: *const f64,
1307 offsets: __m128i,
1308) -> __m256d {
1309 static_assert_imm8_scale!(SCALE);
1310 let zero: __m256d = _mm256_setzero_pd();
1311 let neg_one: __m256d = _mm256_set1_pd(-1.0);
1312 let offsets: i32x4 = offsets.as_i32x4();
1313 let slice: *const i8 = slice as *const i8;
1314 vpgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1315}
1316
1317/// Returns values from `slice` at offsets determined by `offsets * scale`,
1318/// where
1319/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1320/// that position instead.
1321///
1322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_pd)
1323#[inline]
1324#[target_feature(enable = "avx2")]
1325#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1326#[rustc_legacy_const_generics(4)]
1327#[stable(feature = "simd_x86", since = "1.27.0")]
1328pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1329 src: __m256d,
1330 slice: *const f64,
1331 offsets: __m128i,
1332 mask: __m256d,
1333) -> __m256d {
1334 static_assert_imm8_scale!(SCALE);
1335 let offsets: i32x4 = offsets.as_i32x4();
1336 let slice: *const i8 = slice as *const i8;
1337 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1338}
1339
1340/// Returns values from `slice` at offsets determined by `offsets * scale`,
1341/// where
1342/// `scale` should be 1, 2, 4 or 8.
1343///
1344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi32)
1345#[inline]
1346#[target_feature(enable = "avx2")]
1347#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1348#[rustc_legacy_const_generics(2)]
1349#[stable(feature = "simd_x86", since = "1.27.0")]
1350pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1351 slice: *const i32,
1352 offsets: __m128i,
1353) -> __m128i {
1354 static_assert_imm8_scale!(SCALE);
1355 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1356 let neg_one: i32x4 = _mm_set1_epi64x(-1).as_i32x4();
1357 let offsets: i64x2 = offsets.as_i64x2();
1358 let slice: *const i8 = slice as *const i8;
1359 let r: i32x4 = pgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1360 transmute(src:r)
1361}
1362
1363/// Returns values from `slice` at offsets determined by `offsets * scale`,
1364/// where
1365/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1366/// that position instead.
1367///
1368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi32)
1369#[inline]
1370#[target_feature(enable = "avx2")]
1371#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1372#[rustc_legacy_const_generics(4)]
1373#[stable(feature = "simd_x86", since = "1.27.0")]
1374pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1375 src: __m128i,
1376 slice: *const i32,
1377 offsets: __m128i,
1378 mask: __m128i,
1379) -> __m128i {
1380 static_assert_imm8_scale!(SCALE);
1381 let src: i32x4 = src.as_i32x4();
1382 let mask: i32x4 = mask.as_i32x4();
1383 let offsets: i64x2 = offsets.as_i64x2();
1384 let slice: *const i8 = slice as *const i8;
1385 let r: i32x4 = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1386 transmute(src:r)
1387}
1388
1389/// Returns values from `slice` at offsets determined by `offsets * scale`,
1390/// where
1391/// `scale` should be 1, 2, 4 or 8.
1392///
1393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi32)
1394#[inline]
1395#[target_feature(enable = "avx2")]
1396#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1397#[rustc_legacy_const_generics(2)]
1398#[stable(feature = "simd_x86", since = "1.27.0")]
1399pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1400 slice: *const i32,
1401 offsets: __m256i,
1402) -> __m128i {
1403 static_assert_imm8_scale!(SCALE);
1404 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1405 let neg_one: i32x4 = _mm_set1_epi64x(-1).as_i32x4();
1406 let offsets: i64x4 = offsets.as_i64x4();
1407 let slice: *const i8 = slice as *const i8;
1408 let r: i32x4 = vpgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1409 transmute(src:r)
1410}
1411
1412/// Returns values from `slice` at offsets determined by `offsets * scale`,
1413/// where
1414/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1415/// that position instead.
1416///
1417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi32)
1418#[inline]
1419#[target_feature(enable = "avx2")]
1420#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1421#[rustc_legacy_const_generics(4)]
1422#[stable(feature = "simd_x86", since = "1.27.0")]
1423pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1424 src: __m128i,
1425 slice: *const i32,
1426 offsets: __m256i,
1427 mask: __m128i,
1428) -> __m128i {
1429 static_assert_imm8_scale!(SCALE);
1430 let src: i32x4 = src.as_i32x4();
1431 let mask: i32x4 = mask.as_i32x4();
1432 let offsets: i64x4 = offsets.as_i64x4();
1433 let slice: *const i8 = slice as *const i8;
1434 let r: i32x4 = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1435 transmute(src:r)
1436}
1437
1438/// Returns values from `slice` at offsets determined by `offsets * scale`,
1439/// where
1440/// `scale` should be 1, 2, 4 or 8.
1441///
1442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_ps)
1443#[inline]
1444#[target_feature(enable = "avx2")]
1445#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1446#[rustc_legacy_const_generics(2)]
1447#[stable(feature = "simd_x86", since = "1.27.0")]
1448pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1449 static_assert_imm8_scale!(SCALE);
1450 let zero: __m128 = _mm_setzero_ps();
1451 let neg_one: __m128 = _mm_set1_ps(-1.0);
1452 let offsets: i64x2 = offsets.as_i64x2();
1453 let slice: *const i8 = slice as *const i8;
1454 pgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1455}
1456
1457/// Returns values from `slice` at offsets determined by `offsets * scale`,
1458/// where
1459/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1460/// that position instead.
1461///
1462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_ps)
1463#[inline]
1464#[target_feature(enable = "avx2")]
1465#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1466#[rustc_legacy_const_generics(4)]
1467#[stable(feature = "simd_x86", since = "1.27.0")]
1468pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1469 src: __m128,
1470 slice: *const f32,
1471 offsets: __m128i,
1472 mask: __m128,
1473) -> __m128 {
1474 static_assert_imm8_scale!(SCALE);
1475 let offsets: i64x2 = offsets.as_i64x2();
1476 let slice: *const i8 = slice as *const i8;
1477 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1478}
1479
1480/// Returns values from `slice` at offsets determined by `offsets * scale`,
1481/// where
1482/// `scale` should be 1, 2, 4 or 8.
1483///
1484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_ps)
1485#[inline]
1486#[target_feature(enable = "avx2")]
1487#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1488#[rustc_legacy_const_generics(2)]
1489#[stable(feature = "simd_x86", since = "1.27.0")]
1490pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1491 static_assert_imm8_scale!(SCALE);
1492 let zero: __m128 = _mm_setzero_ps();
1493 let neg_one: __m128 = _mm_set1_ps(-1.0);
1494 let offsets: i64x4 = offsets.as_i64x4();
1495 let slice: *const i8 = slice as *const i8;
1496 vpgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1497}
1498
1499/// Returns values from `slice` at offsets determined by `offsets * scale`,
1500/// where
1501/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1502/// that position instead.
1503///
1504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_ps)
1505#[inline]
1506#[target_feature(enable = "avx2")]
1507#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1508#[rustc_legacy_const_generics(4)]
1509#[stable(feature = "simd_x86", since = "1.27.0")]
1510pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1511 src: __m128,
1512 slice: *const f32,
1513 offsets: __m256i,
1514 mask: __m128,
1515) -> __m128 {
1516 static_assert_imm8_scale!(SCALE);
1517 let offsets: i64x4 = offsets.as_i64x4();
1518 let slice: *const i8 = slice as *const i8;
1519 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1520}
1521
1522/// Returns values from `slice` at offsets determined by `offsets * scale`,
1523/// where
1524/// `scale` should be 1, 2, 4 or 8.
1525///
1526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi64)
1527#[inline]
1528#[target_feature(enable = "avx2")]
1529#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1530#[rustc_legacy_const_generics(2)]
1531#[stable(feature = "simd_x86", since = "1.27.0")]
1532pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1533 slice: *const i64,
1534 offsets: __m128i,
1535) -> __m128i {
1536 static_assert_imm8_scale!(SCALE);
1537 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1538 let neg_one: i64x2 = _mm_set1_epi64x(-1).as_i64x2();
1539 let slice: *const i8 = slice as *const i8;
1540 let offsets: i64x2 = offsets.as_i64x2();
1541 let r: i64x2 = pgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1542 transmute(src:r)
1543}
1544
1545/// Returns values from `slice` at offsets determined by `offsets * scale`,
1546/// where
1547/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1548/// that position instead.
1549///
1550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi64)
1551#[inline]
1552#[target_feature(enable = "avx2")]
1553#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1554#[rustc_legacy_const_generics(4)]
1555#[stable(feature = "simd_x86", since = "1.27.0")]
1556pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1557 src: __m128i,
1558 slice: *const i64,
1559 offsets: __m128i,
1560 mask: __m128i,
1561) -> __m128i {
1562 static_assert_imm8_scale!(SCALE);
1563 let src: i64x2 = src.as_i64x2();
1564 let mask: i64x2 = mask.as_i64x2();
1565 let offsets: i64x2 = offsets.as_i64x2();
1566 let slice: *const i8 = slice as *const i8;
1567 let r: i64x2 = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1568 transmute(src:r)
1569}
1570
1571/// Returns values from `slice` at offsets determined by `offsets * scale`,
1572/// where
1573/// `scale` should be 1, 2, 4 or 8.
1574///
1575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi64)
1576#[inline]
1577#[target_feature(enable = "avx2")]
1578#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1579#[rustc_legacy_const_generics(2)]
1580#[stable(feature = "simd_x86", since = "1.27.0")]
1581pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1582 slice: *const i64,
1583 offsets: __m256i,
1584) -> __m256i {
1585 static_assert_imm8_scale!(SCALE);
1586 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1587 let neg_one: i64x4 = _mm256_set1_epi64x(-1).as_i64x4();
1588 let slice: *const i8 = slice as *const i8;
1589 let offsets: i64x4 = offsets.as_i64x4();
1590 let r: i64x4 = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1591 transmute(src:r)
1592}
1593
1594/// Returns values from `slice` at offsets determined by `offsets * scale`,
1595/// where
1596/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1597/// that position instead.
1598///
1599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi64)
1600#[inline]
1601#[target_feature(enable = "avx2")]
1602#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1603#[rustc_legacy_const_generics(4)]
1604#[stable(feature = "simd_x86", since = "1.27.0")]
1605pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1606 src: __m256i,
1607 slice: *const i64,
1608 offsets: __m256i,
1609 mask: __m256i,
1610) -> __m256i {
1611 static_assert_imm8_scale!(SCALE);
1612 let src: i64x4 = src.as_i64x4();
1613 let mask: i64x4 = mask.as_i64x4();
1614 let offsets: i64x4 = offsets.as_i64x4();
1615 let slice: *const i8 = slice as *const i8;
1616 let r: i64x4 = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1617 transmute(src:r)
1618}
1619
1620/// Returns values from `slice` at offsets determined by `offsets * scale`,
1621/// where
1622/// `scale` should be 1, 2, 4 or 8.
1623///
1624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd)
1625#[inline]
1626#[target_feature(enable = "avx2")]
1627#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1628#[rustc_legacy_const_generics(2)]
1629#[stable(feature = "simd_x86", since = "1.27.0")]
1630pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1631 static_assert_imm8_scale!(SCALE);
1632 let zero: __m128d = _mm_setzero_pd();
1633 let neg_one: __m128d = _mm_set1_pd(-1.0);
1634 let slice: *const i8 = slice as *const i8;
1635 let offsets: i64x2 = offsets.as_i64x2();
1636 pgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1637}
1638
1639/// Returns values from `slice` at offsets determined by `offsets * scale`,
1640/// where
1641/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1642/// that position instead.
1643///
1644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd)
1645#[inline]
1646#[target_feature(enable = "avx2")]
1647#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1648#[rustc_legacy_const_generics(4)]
1649#[stable(feature = "simd_x86", since = "1.27.0")]
1650pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1651 src: __m128d,
1652 slice: *const f64,
1653 offsets: __m128i,
1654 mask: __m128d,
1655) -> __m128d {
1656 static_assert_imm8_scale!(SCALE);
1657 let slice: *const i8 = slice as *const i8;
1658 let offsets: i64x2 = offsets.as_i64x2();
1659 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1660}
1661
1662/// Returns values from `slice` at offsets determined by `offsets * scale`,
1663/// where
1664/// `scale` should be 1, 2, 4 or 8.
1665///
1666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd)
1667#[inline]
1668#[target_feature(enable = "avx2")]
1669#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1670#[rustc_legacy_const_generics(2)]
1671#[stable(feature = "simd_x86", since = "1.27.0")]
1672pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1673 slice: *const f64,
1674 offsets: __m256i,
1675) -> __m256d {
1676 static_assert_imm8_scale!(SCALE);
1677 let zero: __m256d = _mm256_setzero_pd();
1678 let neg_one: __m256d = _mm256_set1_pd(-1.0);
1679 let slice: *const i8 = slice as *const i8;
1680 let offsets: i64x4 = offsets.as_i64x4();
1681 vpgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1682}
1683
1684/// Returns values from `slice` at offsets determined by `offsets * scale`,
1685/// where
1686/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1687/// that position instead.
1688///
1689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd)
1690#[inline]
1691#[target_feature(enable = "avx2")]
1692#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1693#[rustc_legacy_const_generics(4)]
1694#[stable(feature = "simd_x86", since = "1.27.0")]
1695pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1696 src: __m256d,
1697 slice: *const f64,
1698 offsets: __m256i,
1699 mask: __m256d,
1700) -> __m256d {
1701 static_assert_imm8_scale!(SCALE);
1702 let slice: *const i8 = slice as *const i8;
1703 let offsets: i64x4 = offsets.as_i64x4();
1704 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1705}
1706
1707/// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
1708/// location specified by `IMM1`.
1709///
1710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti128_si256)
1711#[inline]
1712#[target_feature(enable = "avx2")]
1713#[cfg_attr(
1714 all(test, not(target_os = "windows")),
1715 assert_instr(vinsertf128, IMM1 = 1)
1716)]
1717#[rustc_legacy_const_generics(2)]
1718#[stable(feature = "simd_x86", since = "1.27.0")]
1719pub unsafe fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1720 static_assert_uimm_bits!(IMM1, 1);
1721 let a: i64x4 = a.as_i64x4();
1722 let b: i64x4 = _mm256_castsi128_si256(b).as_i64x4();
1723 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1724 transmute(src:dst)
1725}
1726
1727/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
1728/// intermediate signed 32-bit integers. Horizontally add adjacent pairs
1729/// of intermediate 32-bit integers.
1730///
1731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd_epi16)
1732#[inline]
1733#[target_feature(enable = "avx2")]
1734#[cfg_attr(test, assert_instr(vpmaddwd))]
1735#[stable(feature = "simd_x86", since = "1.27.0")]
1736pub unsafe fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1737 transmute(src:pmaddwd(a:a.as_i16x16(), b:b.as_i16x16()))
1738}
1739
1740/// Vertically multiplies each unsigned 8-bit integer from `a` with the
1741/// corresponding signed 8-bit integer from `b`, producing intermediate
1742/// signed 16-bit integers. Horizontally add adjacent pairs of intermediate
1743/// signed 16-bit integers
1744///
1745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16)
1746#[inline]
1747#[target_feature(enable = "avx2")]
1748#[cfg_attr(test, assert_instr(vpmaddubsw))]
1749#[stable(feature = "simd_x86", since = "1.27.0")]
1750pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1751 transmute(src:pmaddubsw(a:a.as_u8x32(), b:b.as_u8x32()))
1752}
1753
1754/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1755/// (elements are zeroed out when the highest bit is not set in the
1756/// corresponding element).
1757///
1758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi32)
1759#[inline]
1760#[target_feature(enable = "avx2")]
1761#[cfg_attr(test, assert_instr(vpmaskmovd))]
1762#[stable(feature = "simd_x86", since = "1.27.0")]
1763pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1764 transmute(src:maskloadd(mem_addr as *const i8, mask:mask.as_i32x4()))
1765}
1766
1767/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1768/// (elements are zeroed out when the highest bit is not set in the
1769/// corresponding element).
1770///
1771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi32)
1772#[inline]
1773#[target_feature(enable = "avx2")]
1774#[cfg_attr(test, assert_instr(vpmaskmovd))]
1775#[stable(feature = "simd_x86", since = "1.27.0")]
1776pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1777 transmute(src:maskloadd256(mem_addr as *const i8, mask:mask.as_i32x8()))
1778}
1779
1780/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
1781/// (elements are zeroed out when the highest bit is not set in the
1782/// corresponding element).
1783///
1784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi64)
1785#[inline]
1786#[target_feature(enable = "avx2")]
1787#[cfg_attr(test, assert_instr(vpmaskmovq))]
1788#[stable(feature = "simd_x86", since = "1.27.0")]
1789pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1790 transmute(src:maskloadq(mem_addr as *const i8, mask:mask.as_i64x2()))
1791}
1792
1793/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
1794/// (elements are zeroed out when the highest bit is not set in the
1795/// corresponding element).
1796///
1797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi64)
1798#[inline]
1799#[target_feature(enable = "avx2")]
1800#[cfg_attr(test, assert_instr(vpmaskmovq))]
1801#[stable(feature = "simd_x86", since = "1.27.0")]
1802pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1803 transmute(src:maskloadq256(mem_addr as *const i8, mask:mask.as_i64x4()))
1804}
1805
1806/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
1807/// using `mask` (elements are not stored when the highest bit is not set
1808/// in the corresponding element).
1809///
1810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi32)
1811#[inline]
1812#[target_feature(enable = "avx2")]
1813#[cfg_attr(test, assert_instr(vpmaskmovd))]
1814#[stable(feature = "simd_x86", since = "1.27.0")]
1815pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1816 maskstored(mem_addr as *mut i8, mask:mask.as_i32x4(), a:a.as_i32x4())
1817}
1818
1819/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
1820/// using `mask` (elements are not stored when the highest bit is not set
1821/// in the corresponding element).
1822///
1823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi32)
1824#[inline]
1825#[target_feature(enable = "avx2")]
1826#[cfg_attr(test, assert_instr(vpmaskmovd))]
1827#[stable(feature = "simd_x86", since = "1.27.0")]
1828pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1829 maskstored256(mem_addr as *mut i8, mask:mask.as_i32x8(), a:a.as_i32x8())
1830}
1831
1832/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
1833/// using `mask` (elements are not stored when the highest bit is not set
1834/// in the corresponding element).
1835///
1836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi64)
1837#[inline]
1838#[target_feature(enable = "avx2")]
1839#[cfg_attr(test, assert_instr(vpmaskmovq))]
1840#[stable(feature = "simd_x86", since = "1.27.0")]
1841pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1842 maskstoreq(mem_addr as *mut i8, mask:mask.as_i64x2(), a:a.as_i64x2())
1843}
1844
1845/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
1846/// using `mask` (elements are not stored when the highest bit is not set
1847/// in the corresponding element).
1848///
1849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi64)
1850#[inline]
1851#[target_feature(enable = "avx2")]
1852#[cfg_attr(test, assert_instr(vpmaskmovq))]
1853#[stable(feature = "simd_x86", since = "1.27.0")]
1854pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1855 maskstoreq256(mem_addr as *mut i8, mask:mask.as_i64x4(), a:a.as_i64x4())
1856}
1857
1858/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
1859/// maximum values.
1860///
1861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi16)
1862#[inline]
1863#[target_feature(enable = "avx2")]
1864#[cfg_attr(test, assert_instr(vpmaxsw))]
1865#[stable(feature = "simd_x86", since = "1.27.0")]
1866pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1867 let a: i16x16 = a.as_i16x16();
1868 let b: i16x16 = b.as_i16x16();
1869 transmute(src:simd_select::<i16x16, _>(m:simd_gt(x:a, y:b), a, b))
1870}
1871
1872/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
1873/// maximum values.
1874///
1875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi32)
1876#[inline]
1877#[target_feature(enable = "avx2")]
1878#[cfg_attr(test, assert_instr(vpmaxsd))]
1879#[stable(feature = "simd_x86", since = "1.27.0")]
1880pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1881 let a: i32x8 = a.as_i32x8();
1882 let b: i32x8 = b.as_i32x8();
1883 transmute(src:simd_select::<i32x8, _>(m:simd_gt(x:a, y:b), a, b))
1884}
1885
1886/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
1887/// maximum values.
1888///
1889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi8)
1890#[inline]
1891#[target_feature(enable = "avx2")]
1892#[cfg_attr(test, assert_instr(vpmaxsb))]
1893#[stable(feature = "simd_x86", since = "1.27.0")]
1894pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1895 let a: i8x32 = a.as_i8x32();
1896 let b: i8x32 = b.as_i8x32();
1897 transmute(src:simd_select::<i8x32, _>(m:simd_gt(x:a, y:b), a, b))
1898}
1899
1900/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
1901/// the packed maximum values.
1902///
1903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu16)
1904#[inline]
1905#[target_feature(enable = "avx2")]
1906#[cfg_attr(test, assert_instr(vpmaxuw))]
1907#[stable(feature = "simd_x86", since = "1.27.0")]
1908pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1909 let a: u16x16 = a.as_u16x16();
1910 let b: u16x16 = b.as_u16x16();
1911 transmute(src:simd_select::<i16x16, _>(m:simd_gt(x:a, y:b), a, b))
1912}
1913
1914/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
1915/// the packed maximum values.
1916///
1917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu32)
1918#[inline]
1919#[target_feature(enable = "avx2")]
1920#[cfg_attr(test, assert_instr(vpmaxud))]
1921#[stable(feature = "simd_x86", since = "1.27.0")]
1922pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1923 let a: u32x8 = a.as_u32x8();
1924 let b: u32x8 = b.as_u32x8();
1925 transmute(src:simd_select::<i32x8, _>(m:simd_gt(x:a, y:b), a, b))
1926}
1927
1928/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
1929/// the packed maximum values.
1930///
1931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu8)
1932#[inline]
1933#[target_feature(enable = "avx2")]
1934#[cfg_attr(test, assert_instr(vpmaxub))]
1935#[stable(feature = "simd_x86", since = "1.27.0")]
1936pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
1937 let a: u8x32 = a.as_u8x32();
1938 let b: u8x32 = b.as_u8x32();
1939 transmute(src:simd_select::<i8x32, _>(m:simd_gt(x:a, y:b), a, b))
1940}
1941
1942/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
1943/// minimum values.
1944///
1945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi16)
1946#[inline]
1947#[target_feature(enable = "avx2")]
1948#[cfg_attr(test, assert_instr(vpminsw))]
1949#[stable(feature = "simd_x86", since = "1.27.0")]
1950pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
1951 let a: i16x16 = a.as_i16x16();
1952 let b: i16x16 = b.as_i16x16();
1953 transmute(src:simd_select::<i16x16, _>(m:simd_lt(x:a, y:b), a, b))
1954}
1955
1956/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
1957/// minimum values.
1958///
1959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi32)
1960#[inline]
1961#[target_feature(enable = "avx2")]
1962#[cfg_attr(test, assert_instr(vpminsd))]
1963#[stable(feature = "simd_x86", since = "1.27.0")]
1964pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
1965 let a: i32x8 = a.as_i32x8();
1966 let b: i32x8 = b.as_i32x8();
1967 transmute(src:simd_select::<i32x8, _>(m:simd_lt(x:a, y:b), a, b))
1968}
1969
1970/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
1971/// minimum values.
1972///
1973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi8)
1974#[inline]
1975#[target_feature(enable = "avx2")]
1976#[cfg_attr(test, assert_instr(vpminsb))]
1977#[stable(feature = "simd_x86", since = "1.27.0")]
1978pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
1979 let a: i8x32 = a.as_i8x32();
1980 let b: i8x32 = b.as_i8x32();
1981 transmute(src:simd_select::<i8x32, _>(m:simd_lt(x:a, y:b), a, b))
1982}
1983
1984/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
1985/// the packed minimum values.
1986///
1987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu16)
1988#[inline]
1989#[target_feature(enable = "avx2")]
1990#[cfg_attr(test, assert_instr(vpminuw))]
1991#[stable(feature = "simd_x86", since = "1.27.0")]
1992pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
1993 let a: u16x16 = a.as_u16x16();
1994 let b: u16x16 = b.as_u16x16();
1995 transmute(src:simd_select::<i16x16, _>(m:simd_lt(x:a, y:b), a, b))
1996}
1997
1998/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
1999/// the packed minimum values.
2000///
2001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu32)
2002#[inline]
2003#[target_feature(enable = "avx2")]
2004#[cfg_attr(test, assert_instr(vpminud))]
2005#[stable(feature = "simd_x86", since = "1.27.0")]
2006pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2007 let a: u32x8 = a.as_u32x8();
2008 let b: u32x8 = b.as_u32x8();
2009 transmute(src:simd_select::<i32x8, _>(m:simd_lt(x:a, y:b), a, b))
2010}
2011
2012/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2013/// the packed minimum values.
2014///
2015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu8)
2016#[inline]
2017#[target_feature(enable = "avx2")]
2018#[cfg_attr(test, assert_instr(vpminub))]
2019#[stable(feature = "simd_x86", since = "1.27.0")]
2020pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2021 let a: u8x32 = a.as_u8x32();
2022 let b: u8x32 = b.as_u8x32();
2023 transmute(src:simd_select::<i8x32, _>(m:simd_lt(x:a, y:b), a, b))
2024}
2025
2026/// Creates mask from the most significant bit of each 8-bit element in `a`,
2027/// return the result.
2028///
2029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_epi8)
2030#[inline]
2031#[target_feature(enable = "avx2")]
2032#[cfg_attr(test, assert_instr(vpmovmskb))]
2033#[stable(feature = "simd_x86", since = "1.27.0")]
2034pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2035 let z: i8x32 = i8x32::splat(0);
2036 let m: i8x32 = simd_lt(x:a.as_i8x32(), y:z);
2037 simd_bitmask::<_, u32>(m) as i32
2038}
2039
2040/// Computes the sum of absolute differences (SADs) of quadruplets of unsigned
2041/// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit
2042/// results in dst. Eight SADs are performed for each 128-bit lane using one
2043/// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is
2044/// selected from `b` starting at on the offset specified in `imm8`. Eight
2045/// quadruplets are formed from sequential 8-bit integers selected from `a`
2046/// starting at the offset specified in `imm8`.
2047///
2048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mpsadbw_epu8)
2049#[inline]
2050#[target_feature(enable = "avx2")]
2051#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2052#[rustc_legacy_const_generics(2)]
2053#[stable(feature = "simd_x86", since = "1.27.0")]
2054pub unsafe fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2055 static_assert_uimm_bits!(IMM8, 8);
2056 transmute(src:mpsadbw(a:a.as_u8x32(), b:b.as_u8x32(), IMM8))
2057}
2058
2059/// Multiplies the low 32-bit integers from each packed 64-bit element in
2060/// `a` and `b`
2061///
2062/// Returns the 64-bit results.
2063///
2064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epi32)
2065#[inline]
2066#[target_feature(enable = "avx2")]
2067#[cfg_attr(test, assert_instr(vpmuldq))]
2068#[stable(feature = "simd_x86", since = "1.27.0")]
2069pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2070 let a: i64x4 = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2071 let b: i64x4 = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2072 transmute(src:simd_mul(x:a, y:b))
2073}
2074
2075/// Multiplies the low unsigned 32-bit integers from each packed 64-bit
2076/// element in `a` and `b`
2077///
2078/// Returns the unsigned 64-bit results.
2079///
2080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epu32)
2081#[inline]
2082#[target_feature(enable = "avx2")]
2083#[cfg_attr(test, assert_instr(vpmuludq))]
2084#[stable(feature = "simd_x86", since = "1.27.0")]
2085pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2086 let a: u64x4 = a.as_u64x4();
2087 let b: u64x4 = b.as_u64x4();
2088 let mask: u64x4 = u64x4::splat(u32::MAX.into());
2089 transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
2090}
2091
2092/// Multiplies the packed 16-bit integers in `a` and `b`, producing
2093/// intermediate 32-bit integers and returning the high 16 bits of the
2094/// intermediate integers.
2095///
2096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epi16)
2097#[inline]
2098#[target_feature(enable = "avx2")]
2099#[cfg_attr(test, assert_instr(vpmulhw))]
2100#[stable(feature = "simd_x86", since = "1.27.0")]
2101pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2102 let a: i32x16 = simd_cast::<_, i32x16>(a.as_i16x16());
2103 let b: i32x16 = simd_cast::<_, i32x16>(b.as_i16x16());
2104 let r: i32x16 = simd_shr(x:simd_mul(a, b), y:i32x16::splat(16));
2105 transmute(src:simd_cast::<i32x16, i16x16>(r))
2106}
2107
2108/// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing
2109/// intermediate 32-bit integers and returning the high 16 bits of the
2110/// intermediate integers.
2111///
2112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epu16)
2113#[inline]
2114#[target_feature(enable = "avx2")]
2115#[cfg_attr(test, assert_instr(vpmulhuw))]
2116#[stable(feature = "simd_x86", since = "1.27.0")]
2117pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2118 let a: u32x16 = simd_cast::<_, u32x16>(a.as_u16x16());
2119 let b: u32x16 = simd_cast::<_, u32x16>(b.as_u16x16());
2120 let r: u32x16 = simd_shr(x:simd_mul(a, b), y:u32x16::splat(16));
2121 transmute(src:simd_cast::<u32x16, u16x16>(r))
2122}
2123
2124/// Multiplies the packed 16-bit integers in `a` and `b`, producing
2125/// intermediate 32-bit integers, and returns the low 16 bits of the
2126/// intermediate integers
2127///
2128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi16)
2129#[inline]
2130#[target_feature(enable = "avx2")]
2131#[cfg_attr(test, assert_instr(vpmullw))]
2132#[stable(feature = "simd_x86", since = "1.27.0")]
2133pub unsafe fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2134 transmute(src:simd_mul(x:a.as_i16x16(), y:b.as_i16x16()))
2135}
2136
2137/// Multiplies the packed 32-bit integers in `a` and `b`, producing
2138/// intermediate 64-bit integers, and returns the low 32 bits of the
2139/// intermediate integers
2140///
2141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi32)
2142#[inline]
2143#[target_feature(enable = "avx2")]
2144#[cfg_attr(test, assert_instr(vpmulld))]
2145#[stable(feature = "simd_x86", since = "1.27.0")]
2146pub unsafe fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2147 transmute(src:simd_mul(x:a.as_i32x8(), y:b.as_i32x8()))
2148}
2149
2150/// Multiplies packed 16-bit integers in `a` and `b`, producing
2151/// intermediate signed 32-bit integers. Truncate each intermediate
2152/// integer to the 18 most significant bits, round by adding 1, and
2153/// return bits `[16:1]`.
2154///
2155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhrs_epi16)
2156#[inline]
2157#[target_feature(enable = "avx2")]
2158#[cfg_attr(test, assert_instr(vpmulhrsw))]
2159#[stable(feature = "simd_x86", since = "1.27.0")]
2160pub unsafe fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2161 transmute(src:pmulhrsw(a:a.as_i16x16(), b:b.as_i16x16()))
2162}
2163
2164/// Computes the bitwise OR of 256 bits (representing integer data) in `a`
2165/// and `b`
2166///
2167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_si256)
2168#[inline]
2169#[target_feature(enable = "avx2")]
2170#[cfg_attr(test, assert_instr(vorps))]
2171#[stable(feature = "simd_x86", since = "1.27.0")]
2172pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2173 transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8()))
2174}
2175
2176/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2177/// using signed saturation
2178///
2179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16)
2180#[inline]
2181#[target_feature(enable = "avx2")]
2182#[cfg_attr(test, assert_instr(vpacksswb))]
2183#[stable(feature = "simd_x86", since = "1.27.0")]
2184pub unsafe fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2185 transmute(src:packsswb(a:a.as_i16x16(), b:b.as_i16x16()))
2186}
2187
2188/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2189/// using signed saturation
2190///
2191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32)
2192#[inline]
2193#[target_feature(enable = "avx2")]
2194#[cfg_attr(test, assert_instr(vpackssdw))]
2195#[stable(feature = "simd_x86", since = "1.27.0")]
2196pub unsafe fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2197 transmute(src:packssdw(a:a.as_i32x8(), b:b.as_i32x8()))
2198}
2199
2200/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2201/// using unsigned saturation
2202///
2203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16)
2204#[inline]
2205#[target_feature(enable = "avx2")]
2206#[cfg_attr(test, assert_instr(vpackuswb))]
2207#[stable(feature = "simd_x86", since = "1.27.0")]
2208pub unsafe fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2209 transmute(src:packuswb(a:a.as_i16x16(), b:b.as_i16x16()))
2210}
2211
2212/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2213/// using unsigned saturation
2214///
2215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32)
2216#[inline]
2217#[target_feature(enable = "avx2")]
2218#[cfg_attr(test, assert_instr(vpackusdw))]
2219#[stable(feature = "simd_x86", since = "1.27.0")]
2220pub unsafe fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2221 transmute(src:packusdw(a:a.as_i32x8(), b:b.as_i32x8()))
2222}
2223
2224/// Permutes packed 32-bit integers from `a` according to the content of `b`.
2225///
2226/// The last 3 bits of each integer of `b` are used as addresses into the 8
2227/// integers of `a`.
2228///
2229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32)
2230#[inline]
2231#[target_feature(enable = "avx2")]
2232#[cfg_attr(test, assert_instr(vpermps))]
2233#[stable(feature = "simd_x86", since = "1.27.0")]
2234pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2235 transmute(src:permd(a:a.as_u32x8(), b:b.as_u32x8()))
2236}
2237
2238/// Permutes 64-bit integers from `a` using control mask `imm8`.
2239///
2240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_epi64)
2241#[inline]
2242#[target_feature(enable = "avx2")]
2243#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2244#[rustc_legacy_const_generics(1)]
2245#[stable(feature = "simd_x86", since = "1.27.0")]
2246pub unsafe fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2247 static_assert_uimm_bits!(IMM8, 8);
2248 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
2249 let r: i64x4 = simd_shuffle!(
2250 a.as_i64x4(),
2251 zero,
2252 [
2253 IMM8 as u32 & 0b11,
2254 (IMM8 as u32 >> 2) & 0b11,
2255 (IMM8 as u32 >> 4) & 0b11,
2256 (IMM8 as u32 >> 6) & 0b11,
2257 ],
2258 );
2259 transmute(src:r)
2260}
2261
2262/// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`.
2263///
2264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256)
2265#[inline]
2266#[target_feature(enable = "avx2")]
2267#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2268#[rustc_legacy_const_generics(2)]
2269#[stable(feature = "simd_x86", since = "1.27.0")]
2270pub unsafe fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2271 static_assert_uimm_bits!(IMM8, 8);
2272 transmute(src:vperm2i128(a:a.as_i64x4(), b:b.as_i64x4(), IMM8 as i8))
2273}
2274
2275/// Shuffles 64-bit floating-point elements in `a` across lanes using the
2276/// control in `imm8`.
2277///
2278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_pd)
2279#[inline]
2280#[target_feature(enable = "avx2")]
2281#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2282#[rustc_legacy_const_generics(1)]
2283#[stable(feature = "simd_x86", since = "1.27.0")]
2284pub unsafe fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2285 static_assert_uimm_bits!(IMM8, 8);
2286 simd_shuffle!(
2287 a,
2288 _mm256_undefined_pd(),
2289 [
2290 IMM8 as u32 & 0b11,
2291 (IMM8 as u32 >> 2) & 0b11,
2292 (IMM8 as u32 >> 4) & 0b11,
2293 (IMM8 as u32 >> 6) & 0b11,
2294 ],
2295 )
2296}
2297
2298/// Shuffles eight 32-bit floating-point elements in `a` across lanes using
2299/// the corresponding 32-bit integer index in `idx`.
2300///
2301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_ps)
2302#[inline]
2303#[target_feature(enable = "avx2")]
2304#[cfg_attr(test, assert_instr(vpermps))]
2305#[stable(feature = "simd_x86", since = "1.27.0")]
2306pub unsafe fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2307 permps(a, b:idx.as_i32x8())
2308}
2309
2310/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
2311/// and `b`, then horizontally sum each consecutive 8 differences to
2312/// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
2313/// integers in the low 16 bits of the 64-bit return value
2314///
2315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sad_epu8)
2316#[inline]
2317#[target_feature(enable = "avx2")]
2318#[cfg_attr(test, assert_instr(vpsadbw))]
2319#[stable(feature = "simd_x86", since = "1.27.0")]
2320pub unsafe fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2321 transmute(src:psadbw(a:a.as_u8x32(), b:b.as_u8x32()))
2322}
2323
2324/// Shuffles bytes from `a` according to the content of `b`.
2325///
2326/// For each of the 128-bit low and high halves of the vectors, the last
2327/// 4 bits of each byte of `b` are used as addresses into the respective
2328/// low or high 16 bytes of `a`. That is, the halves are shuffled separately.
2329///
2330/// In addition, if the highest significant bit of a byte of `b` is set, the
2331/// respective destination byte is set to 0.
2332///
2333/// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically
2334/// equivalent to:
2335///
2336/// ```
2337/// fn mm256_shuffle_epi8(a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
2338/// let mut r = [0; 32];
2339/// for i in 0..16 {
2340/// // if the most significant bit of b is set,
2341/// // then the destination byte is set to 0.
2342/// if b[i] & 0x80 == 0u8 {
2343/// r[i] = a[(b[i] % 16) as usize];
2344/// }
2345/// if b[i + 16] & 0x80 == 0u8 {
2346/// r[i + 16] = a[(b[i + 16] % 16 + 16) as usize];
2347/// }
2348/// }
2349/// r
2350/// }
2351/// ```
2352///
2353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi8)
2354#[inline]
2355#[target_feature(enable = "avx2")]
2356#[cfg_attr(test, assert_instr(vpshufb))]
2357#[stable(feature = "simd_x86", since = "1.27.0")]
2358pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2359 transmute(src:pshufb(a:a.as_u8x32(), b:b.as_u8x32()))
2360}
2361
2362/// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in
2363/// `imm8`.
2364///
2365/// ```rust
2366/// #[cfg(target_arch = "x86")]
2367/// use std::arch::x86::*;
2368/// #[cfg(target_arch = "x86_64")]
2369/// use std::arch::x86_64::*;
2370///
2371/// # fn main() {
2372/// # if is_x86_feature_detected!("avx2") {
2373/// # #[target_feature(enable = "avx2")]
2374/// # unsafe fn worker() {
2375/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
2376///
2377/// let c1 = _mm256_shuffle_epi32(a, 0b00_11_10_01);
2378/// let c2 = _mm256_shuffle_epi32(a, 0b01_00_10_11);
2379///
2380/// let expected1 = _mm256_setr_epi32(1, 2, 3, 0, 5, 6, 7, 4);
2381/// let expected2 = _mm256_setr_epi32(3, 2, 0, 1, 7, 6, 4, 5);
2382///
2383/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c1, expected1)), !0);
2384/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c2, expected2)), !0);
2385/// # }
2386/// # unsafe { worker(); }
2387/// # }
2388/// # }
2389/// ```
2390///
2391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi32)
2392#[inline]
2393#[target_feature(enable = "avx2")]
2394#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2395#[rustc_legacy_const_generics(1)]
2396#[stable(feature = "simd_x86", since = "1.27.0")]
2397pub unsafe fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2398 static_assert_uimm_bits!(MASK, 8);
2399 let r: i32x8 = simd_shuffle!(
2400 a.as_i32x8(),
2401 a.as_i32x8(),
2402 [
2403 MASK as u32 & 0b11,
2404 (MASK as u32 >> 2) & 0b11,
2405 (MASK as u32 >> 4) & 0b11,
2406 (MASK as u32 >> 6) & 0b11,
2407 (MASK as u32 & 0b11) + 4,
2408 ((MASK as u32 >> 2) & 0b11) + 4,
2409 ((MASK as u32 >> 4) & 0b11) + 4,
2410 ((MASK as u32 >> 6) & 0b11) + 4,
2411 ],
2412 );
2413 transmute(src:r)
2414}
2415
2416/// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using
2417/// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied
2418/// to the output.
2419///
2420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflehi_epi16)
2421#[inline]
2422#[target_feature(enable = "avx2")]
2423#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2424#[rustc_legacy_const_generics(1)]
2425#[stable(feature = "simd_x86", since = "1.27.0")]
2426pub unsafe fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2427 static_assert_uimm_bits!(IMM8, 8);
2428 let a = a.as_i16x16();
2429 let r: i16x16 = simd_shuffle!(
2430 a,
2431 a,
2432 [
2433 0,
2434 1,
2435 2,
2436 3,
2437 4 + (IMM8 as u32 & 0b11),
2438 4 + ((IMM8 as u32 >> 2) & 0b11),
2439 4 + ((IMM8 as u32 >> 4) & 0b11),
2440 4 + ((IMM8 as u32 >> 6) & 0b11),
2441 8,
2442 9,
2443 10,
2444 11,
2445 12 + (IMM8 as u32 & 0b11),
2446 12 + ((IMM8 as u32 >> 2) & 0b11),
2447 12 + ((IMM8 as u32 >> 4) & 0b11),
2448 12 + ((IMM8 as u32 >> 6) & 0b11),
2449 ],
2450 );
2451 transmute(r)
2452}
2453
2454/// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using
2455/// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied
2456/// to the output.
2457///
2458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflelo_epi16)
2459#[inline]
2460#[target_feature(enable = "avx2")]
2461#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2462#[rustc_legacy_const_generics(1)]
2463#[stable(feature = "simd_x86", since = "1.27.0")]
2464pub unsafe fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2465 static_assert_uimm_bits!(IMM8, 8);
2466 let a = a.as_i16x16();
2467 let r: i16x16 = simd_shuffle!(
2468 a,
2469 a,
2470 [
2471 0 + (IMM8 as u32 & 0b11),
2472 0 + ((IMM8 as u32 >> 2) & 0b11),
2473 0 + ((IMM8 as u32 >> 4) & 0b11),
2474 0 + ((IMM8 as u32 >> 6) & 0b11),
2475 4,
2476 5,
2477 6,
2478 7,
2479 8 + (IMM8 as u32 & 0b11),
2480 8 + ((IMM8 as u32 >> 2) & 0b11),
2481 8 + ((IMM8 as u32 >> 4) & 0b11),
2482 8 + ((IMM8 as u32 >> 6) & 0b11),
2483 12,
2484 13,
2485 14,
2486 15,
2487 ],
2488 );
2489 transmute(r)
2490}
2491
2492/// Negates packed 16-bit integers in `a` when the corresponding signed
2493/// 16-bit integer in `b` is negative, and returns the results.
2494/// Results are zeroed out when the corresponding element in `b` is zero.
2495///
2496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi16)
2497#[inline]
2498#[target_feature(enable = "avx2")]
2499#[cfg_attr(test, assert_instr(vpsignw))]
2500#[stable(feature = "simd_x86", since = "1.27.0")]
2501pub unsafe fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2502 transmute(src:psignw(a:a.as_i16x16(), b:b.as_i16x16()))
2503}
2504
2505/// Negates packed 32-bit integers in `a` when the corresponding signed
2506/// 32-bit integer in `b` is negative, and returns the results.
2507/// Results are zeroed out when the corresponding element in `b` is zero.
2508///
2509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi32)
2510#[inline]
2511#[target_feature(enable = "avx2")]
2512#[cfg_attr(test, assert_instr(vpsignd))]
2513#[stable(feature = "simd_x86", since = "1.27.0")]
2514pub unsafe fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2515 transmute(src:psignd(a:a.as_i32x8(), b:b.as_i32x8()))
2516}
2517
2518/// Negates packed 8-bit integers in `a` when the corresponding signed
2519/// 8-bit integer in `b` is negative, and returns the results.
2520/// Results are zeroed out when the corresponding element in `b` is zero.
2521///
2522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi8)
2523#[inline]
2524#[target_feature(enable = "avx2")]
2525#[cfg_attr(test, assert_instr(vpsignb))]
2526#[stable(feature = "simd_x86", since = "1.27.0")]
2527pub unsafe fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2528 transmute(src:psignb(a:a.as_i8x32(), b:b.as_i8x32()))
2529}
2530
2531/// Shifts packed 16-bit integers in `a` left by `count` while
2532/// shifting in zeros, and returns the result
2533///
2534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi16)
2535#[inline]
2536#[target_feature(enable = "avx2")]
2537#[cfg_attr(test, assert_instr(vpsllw))]
2538#[stable(feature = "simd_x86", since = "1.27.0")]
2539pub unsafe fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2540 transmute(src:psllw(a:a.as_i16x16(), count:count.as_i16x8()))
2541}
2542
2543/// Shifts packed 32-bit integers in `a` left by `count` while
2544/// shifting in zeros, and returns the result
2545///
2546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi32)
2547#[inline]
2548#[target_feature(enable = "avx2")]
2549#[cfg_attr(test, assert_instr(vpslld))]
2550#[stable(feature = "simd_x86", since = "1.27.0")]
2551pub unsafe fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2552 transmute(src:pslld(a:a.as_i32x8(), count:count.as_i32x4()))
2553}
2554
2555/// Shifts packed 64-bit integers in `a` left by `count` while
2556/// shifting in zeros, and returns the result
2557///
2558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi64)
2559#[inline]
2560#[target_feature(enable = "avx2")]
2561#[cfg_attr(test, assert_instr(vpsllq))]
2562#[stable(feature = "simd_x86", since = "1.27.0")]
2563pub unsafe fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2564 transmute(src:psllq(a:a.as_i64x4(), count:count.as_i64x2()))
2565}
2566
2567/// Shifts packed 16-bit integers in `a` left by `IMM8` while
2568/// shifting in zeros, return the results;
2569///
2570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi16)
2571#[inline]
2572#[target_feature(enable = "avx2")]
2573#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2574#[rustc_legacy_const_generics(1)]
2575#[stable(feature = "simd_x86", since = "1.27.0")]
2576pub unsafe fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2577 static_assert_uimm_bits!(IMM8, 8);
2578 if IMM8 >= 16 {
2579 _mm256_setzero_si256()
2580 } else {
2581 transmute(src:simd_shl(x:a.as_u16x16(), y:u16x16::splat(IMM8 as u16)))
2582 }
2583}
2584
2585/// Shifts packed 32-bit integers in `a` left by `IMM8` while
2586/// shifting in zeros, return the results;
2587///
2588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi32)
2589#[inline]
2590#[target_feature(enable = "avx2")]
2591#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2592#[rustc_legacy_const_generics(1)]
2593#[stable(feature = "simd_x86", since = "1.27.0")]
2594pub unsafe fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2595 static_assert_uimm_bits!(IMM8, 8);
2596 if IMM8 >= 32 {
2597 _mm256_setzero_si256()
2598 } else {
2599 transmute(src:simd_shl(x:a.as_u32x8(), y:u32x8::splat(IMM8 as u32)))
2600 }
2601}
2602
2603/// Shifts packed 64-bit integers in `a` left by `IMM8` while
2604/// shifting in zeros, return the results;
2605///
2606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi64)
2607#[inline]
2608#[target_feature(enable = "avx2")]
2609#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2610#[rustc_legacy_const_generics(1)]
2611#[stable(feature = "simd_x86", since = "1.27.0")]
2612pub unsafe fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2613 static_assert_uimm_bits!(IMM8, 8);
2614 if IMM8 >= 64 {
2615 _mm256_setzero_si256()
2616 } else {
2617 transmute(src:simd_shl(x:a.as_u64x4(), y:u64x4::splat(IMM8 as u64)))
2618 }
2619}
2620
2621/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2622///
2623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_si256)
2624#[inline]
2625#[target_feature(enable = "avx2")]
2626#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2627#[rustc_legacy_const_generics(1)]
2628#[stable(feature = "simd_x86", since = "1.27.0")]
2629pub unsafe fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2630 static_assert_uimm_bits!(IMM8, 8);
2631 _mm256_bslli_epi128::<IMM8>(a)
2632}
2633
2634/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2635///
2636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bslli_epi128)
2637#[inline]
2638#[target_feature(enable = "avx2")]
2639#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2640#[rustc_legacy_const_generics(1)]
2641#[stable(feature = "simd_x86", since = "1.27.0")]
2642pub unsafe fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2643 static_assert_uimm_bits!(IMM8, 8);
2644 const fn mask(shift: i32, i: u32) -> u32 {
2645 let shift = shift as u32 & 0xff;
2646 if shift > 15 || i % 16 < shift {
2647 0
2648 } else {
2649 32 + (i - shift)
2650 }
2651 }
2652 let a = a.as_i8x32();
2653 let zero = _mm256_setzero_si256().as_i8x32();
2654 let r: i8x32 = simd_shuffle!(
2655 zero,
2656 a,
2657 [
2658 mask(IMM8, 0),
2659 mask(IMM8, 1),
2660 mask(IMM8, 2),
2661 mask(IMM8, 3),
2662 mask(IMM8, 4),
2663 mask(IMM8, 5),
2664 mask(IMM8, 6),
2665 mask(IMM8, 7),
2666 mask(IMM8, 8),
2667 mask(IMM8, 9),
2668 mask(IMM8, 10),
2669 mask(IMM8, 11),
2670 mask(IMM8, 12),
2671 mask(IMM8, 13),
2672 mask(IMM8, 14),
2673 mask(IMM8, 15),
2674 mask(IMM8, 16),
2675 mask(IMM8, 17),
2676 mask(IMM8, 18),
2677 mask(IMM8, 19),
2678 mask(IMM8, 20),
2679 mask(IMM8, 21),
2680 mask(IMM8, 22),
2681 mask(IMM8, 23),
2682 mask(IMM8, 24),
2683 mask(IMM8, 25),
2684 mask(IMM8, 26),
2685 mask(IMM8, 27),
2686 mask(IMM8, 28),
2687 mask(IMM8, 29),
2688 mask(IMM8, 30),
2689 mask(IMM8, 31),
2690 ],
2691 );
2692 transmute(r)
2693}
2694
2695/// Shifts packed 32-bit integers in `a` left by the amount
2696/// specified by the corresponding element in `count` while
2697/// shifting in zeros, and returns the result.
2698///
2699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi32)
2700#[inline]
2701#[target_feature(enable = "avx2")]
2702#[cfg_attr(test, assert_instr(vpsllvd))]
2703#[stable(feature = "simd_x86", since = "1.27.0")]
2704pub unsafe fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2705 transmute(src:psllvd(a:a.as_i32x4(), count:count.as_i32x4()))
2706}
2707
2708/// Shifts packed 32-bit integers in `a` left by the amount
2709/// specified by the corresponding element in `count` while
2710/// shifting in zeros, and returns the result.
2711///
2712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi32)
2713#[inline]
2714#[target_feature(enable = "avx2")]
2715#[cfg_attr(test, assert_instr(vpsllvd))]
2716#[stable(feature = "simd_x86", since = "1.27.0")]
2717pub unsafe fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2718 transmute(src:psllvd256(a:a.as_i32x8(), count:count.as_i32x8()))
2719}
2720
2721/// Shifts packed 64-bit integers in `a` left by the amount
2722/// specified by the corresponding element in `count` while
2723/// shifting in zeros, and returns the result.
2724///
2725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi64)
2726#[inline]
2727#[target_feature(enable = "avx2")]
2728#[cfg_attr(test, assert_instr(vpsllvq))]
2729#[stable(feature = "simd_x86", since = "1.27.0")]
2730pub unsafe fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2731 transmute(src:psllvq(a:a.as_i64x2(), count:count.as_i64x2()))
2732}
2733
2734/// Shifts packed 64-bit integers in `a` left by the amount
2735/// specified by the corresponding element in `count` while
2736/// shifting in zeros, and returns the result.
2737///
2738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi64)
2739#[inline]
2740#[target_feature(enable = "avx2")]
2741#[cfg_attr(test, assert_instr(vpsllvq))]
2742#[stable(feature = "simd_x86", since = "1.27.0")]
2743pub unsafe fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2744 transmute(src:psllvq256(a:a.as_i64x4(), count:count.as_i64x4()))
2745}
2746
2747/// Shifts packed 16-bit integers in `a` right by `count` while
2748/// shifting in sign bits.
2749///
2750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi16)
2751#[inline]
2752#[target_feature(enable = "avx2")]
2753#[cfg_attr(test, assert_instr(vpsraw))]
2754#[stable(feature = "simd_x86", since = "1.27.0")]
2755pub unsafe fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2756 transmute(src:psraw(a:a.as_i16x16(), count:count.as_i16x8()))
2757}
2758
2759/// Shifts packed 32-bit integers in `a` right by `count` while
2760/// shifting in sign bits.
2761///
2762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi32)
2763#[inline]
2764#[target_feature(enable = "avx2")]
2765#[cfg_attr(test, assert_instr(vpsrad))]
2766#[stable(feature = "simd_x86", since = "1.27.0")]
2767pub unsafe fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2768 transmute(src:psrad(a:a.as_i32x8(), count:count.as_i32x4()))
2769}
2770
2771/// Shifts packed 16-bit integers in `a` right by `IMM8` while
2772/// shifting in sign bits.
2773///
2774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi16)
2775#[inline]
2776#[target_feature(enable = "avx2")]
2777#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2778#[rustc_legacy_const_generics(1)]
2779#[stable(feature = "simd_x86", since = "1.27.0")]
2780pub unsafe fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2781 static_assert_uimm_bits!(IMM8, 8);
2782 transmute(src:simd_shr(x:a.as_i16x16(), y:i16x16::splat(IMM8.min(15) as i16)))
2783}
2784
2785/// Shifts packed 32-bit integers in `a` right by `IMM8` while
2786/// shifting in sign bits.
2787///
2788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi32)
2789#[inline]
2790#[target_feature(enable = "avx2")]
2791#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2792#[rustc_legacy_const_generics(1)]
2793#[stable(feature = "simd_x86", since = "1.27.0")]
2794pub unsafe fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2795 static_assert_uimm_bits!(IMM8, 8);
2796 transmute(src:simd_shr(x:a.as_i32x8(), y:i32x8::splat(IMM8.min(31))))
2797}
2798
2799/// Shifts packed 32-bit integers in `a` right by the amount specified by the
2800/// corresponding element in `count` while shifting in sign bits.
2801///
2802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi32)
2803#[inline]
2804#[target_feature(enable = "avx2")]
2805#[cfg_attr(test, assert_instr(vpsravd))]
2806#[stable(feature = "simd_x86", since = "1.27.0")]
2807pub unsafe fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2808 transmute(src:psravd(a:a.as_i32x4(), count:count.as_i32x4()))
2809}
2810
2811/// Shifts packed 32-bit integers in `a` right by the amount specified by the
2812/// corresponding element in `count` while shifting in sign bits.
2813///
2814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi32)
2815#[inline]
2816#[target_feature(enable = "avx2")]
2817#[cfg_attr(test, assert_instr(vpsravd))]
2818#[stable(feature = "simd_x86", since = "1.27.0")]
2819pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2820 transmute(src:psravd256(a:a.as_i32x8(), count:count.as_i32x8()))
2821}
2822
2823/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_si256)
2826#[inline]
2827#[target_feature(enable = "avx2")]
2828#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2829#[rustc_legacy_const_generics(1)]
2830#[stable(feature = "simd_x86", since = "1.27.0")]
2831pub unsafe fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2832 static_assert_uimm_bits!(IMM8, 8);
2833 _mm256_bsrli_epi128::<IMM8>(a)
2834}
2835
2836/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2837///
2838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bsrli_epi128)
2839#[inline]
2840#[target_feature(enable = "avx2")]
2841#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2842#[rustc_legacy_const_generics(1)]
2843#[stable(feature = "simd_x86", since = "1.27.0")]
2844pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2845 static_assert_uimm_bits!(IMM8, 8);
2846 let a = a.as_i8x32();
2847 let zero = _mm256_setzero_si256().as_i8x32();
2848 let r: i8x32 = match IMM8 % 16 {
2849 0 => simd_shuffle!(
2850 a,
2851 zero,
2852 [
2853 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
2854 23, 24, 25, 26, 27, 28, 29, 30, 31,
2855 ],
2856 ),
2857 1 => simd_shuffle!(
2858 a,
2859 zero,
2860 [
2861 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, 23,
2862 24, 25, 26, 27, 28, 29, 30, 31, 32,
2863 ],
2864 ),
2865 2 => simd_shuffle!(
2866 a,
2867 zero,
2868 [
2869 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 18, 19, 20, 21, 22, 23, 24,
2870 25, 26, 27, 28, 29, 30, 31, 32, 32,
2871 ],
2872 ),
2873 3 => simd_shuffle!(
2874 a,
2875 zero,
2876 [
2877 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 19, 20, 21, 22, 23, 24,
2878 25, 26, 27, 28, 29, 30, 31, 32, 32, 32,
2879 ],
2880 ),
2881 4 => simd_shuffle!(
2882 a,
2883 zero,
2884 [
2885 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 20, 21, 22, 23, 24, 25,
2886 26, 27, 28, 29, 30, 31, 32, 32, 32, 32,
2887 ],
2888 ),
2889 5 => simd_shuffle!(
2890 a,
2891 zero,
2892 [
2893 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 21, 22, 23, 24, 25, 26,
2894 27, 28, 29, 30, 31, 32, 32, 32, 32, 32,
2895 ],
2896 ),
2897 6 => simd_shuffle!(
2898 a,
2899 zero,
2900 [
2901 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 22, 23, 24, 25, 26, 27,
2902 28, 29, 30, 31, 32, 32, 32, 32, 32, 32,
2903 ],
2904 ),
2905 7 => simd_shuffle!(
2906 a,
2907 zero,
2908 [
2909 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 23, 24, 25, 26, 27,
2910 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32,
2911 ],
2912 ),
2913 8 => simd_shuffle!(
2914 a,
2915 zero,
2916 [
2917 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 24, 25, 26, 27, 28,
2918 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32,
2919 ],
2920 ),
2921 9 => simd_shuffle!(
2922 a,
2923 zero,
2924 [
2925 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 25, 26, 27, 28, 29,
2926 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2927 ],
2928 ),
2929 10 => simd_shuffle!(
2930 a,
2931 zero,
2932 [
2933 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 26, 27, 28, 29, 30,
2934 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2935 ],
2936 ),
2937 11 => simd_shuffle!(
2938 a,
2939 zero,
2940 [
2941 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 27, 28, 29, 30, 31,
2942 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2943 ],
2944 ),
2945 12 => simd_shuffle!(
2946 a,
2947 zero,
2948 [
2949 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 28, 29, 30, 31, 32,
2950 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2951 ],
2952 ),
2953 13 => simd_shuffle!(
2954 a,
2955 zero,
2956 [
2957 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 29, 30, 31, 32, 32,
2958 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2959 ],
2960 ),
2961 14 => simd_shuffle!(
2962 a,
2963 zero,
2964 [
2965 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 30, 31, 32, 32, 32,
2966 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2967 ],
2968 ),
2969 15 => simd_shuffle!(
2970 a,
2971 zero,
2972 [
2973 14, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32,
2974 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2975 ],
2976 ),
2977 _ => zero,
2978 };
2979 transmute(r)
2980}
2981
2982/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
2983/// zeros.
2984///
2985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi16)
2986#[inline]
2987#[target_feature(enable = "avx2")]
2988#[cfg_attr(test, assert_instr(vpsrlw))]
2989#[stable(feature = "simd_x86", since = "1.27.0")]
2990pub unsafe fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
2991 transmute(src:psrlw(a:a.as_i16x16(), count:count.as_i16x8()))
2992}
2993
2994/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
2995/// zeros.
2996///
2997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi32)
2998#[inline]
2999#[target_feature(enable = "avx2")]
3000#[cfg_attr(test, assert_instr(vpsrld))]
3001#[stable(feature = "simd_x86", since = "1.27.0")]
3002pub unsafe fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3003 transmute(src:psrld(a:a.as_i32x8(), count:count.as_i32x4()))
3004}
3005
3006/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
3007/// zeros.
3008///
3009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi64)
3010#[inline]
3011#[target_feature(enable = "avx2")]
3012#[cfg_attr(test, assert_instr(vpsrlq))]
3013#[stable(feature = "simd_x86", since = "1.27.0")]
3014pub unsafe fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3015 transmute(src:psrlq(a:a.as_i64x4(), count:count.as_i64x2()))
3016}
3017
3018/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
3019/// zeros
3020///
3021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi16)
3022#[inline]
3023#[target_feature(enable = "avx2")]
3024#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3025#[rustc_legacy_const_generics(1)]
3026#[stable(feature = "simd_x86", since = "1.27.0")]
3027pub unsafe fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3028 static_assert_uimm_bits!(IMM8, 8);
3029 if IMM8 >= 16 {
3030 _mm256_setzero_si256()
3031 } else {
3032 transmute(src:simd_shr(x:a.as_u16x16(), y:u16x16::splat(IMM8 as u16)))
3033 }
3034}
3035
3036/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
3037/// zeros
3038///
3039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi32)
3040#[inline]
3041#[target_feature(enable = "avx2")]
3042#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3043#[rustc_legacy_const_generics(1)]
3044#[stable(feature = "simd_x86", since = "1.27.0")]
3045pub unsafe fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3046 static_assert_uimm_bits!(IMM8, 8);
3047 if IMM8 >= 32 {
3048 _mm256_setzero_si256()
3049 } else {
3050 transmute(src:simd_shr(x:a.as_u32x8(), y:u32x8::splat(IMM8 as u32)))
3051 }
3052}
3053
3054/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
3055/// zeros
3056///
3057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi64)
3058#[inline]
3059#[target_feature(enable = "avx2")]
3060#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3061#[rustc_legacy_const_generics(1)]
3062#[stable(feature = "simd_x86", since = "1.27.0")]
3063pub unsafe fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3064 static_assert_uimm_bits!(IMM8, 8);
3065 if IMM8 >= 64 {
3066 _mm256_setzero_si256()
3067 } else {
3068 transmute(src:simd_shr(x:a.as_u64x4(), y:u64x4::splat(IMM8 as u64)))
3069 }
3070}
3071
3072/// Shifts packed 32-bit integers in `a` right by the amount specified by
3073/// the corresponding element in `count` while shifting in zeros,
3074///
3075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi32)
3076#[inline]
3077#[target_feature(enable = "avx2")]
3078#[cfg_attr(test, assert_instr(vpsrlvd))]
3079#[stable(feature = "simd_x86", since = "1.27.0")]
3080pub unsafe fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3081 transmute(src:psrlvd(a:a.as_i32x4(), count:count.as_i32x4()))
3082}
3083
3084/// Shifts packed 32-bit integers in `a` right by the amount specified by
3085/// the corresponding element in `count` while shifting in zeros,
3086///
3087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi32)
3088#[inline]
3089#[target_feature(enable = "avx2")]
3090#[cfg_attr(test, assert_instr(vpsrlvd))]
3091#[stable(feature = "simd_x86", since = "1.27.0")]
3092pub unsafe fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3093 transmute(src:psrlvd256(a:a.as_i32x8(), count:count.as_i32x8()))
3094}
3095
3096/// Shifts packed 64-bit integers in `a` right by the amount specified by
3097/// the corresponding element in `count` while shifting in zeros,
3098///
3099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi64)
3100#[inline]
3101#[target_feature(enable = "avx2")]
3102#[cfg_attr(test, assert_instr(vpsrlvq))]
3103#[stable(feature = "simd_x86", since = "1.27.0")]
3104pub unsafe fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3105 transmute(src:psrlvq(a:a.as_i64x2(), count:count.as_i64x2()))
3106}
3107
3108/// Shifts packed 64-bit integers in `a` right by the amount specified by
3109/// the corresponding element in `count` while shifting in zeros,
3110///
3111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi64)
3112#[inline]
3113#[target_feature(enable = "avx2")]
3114#[cfg_attr(test, assert_instr(vpsrlvq))]
3115#[stable(feature = "simd_x86", since = "1.27.0")]
3116pub unsafe fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3117 transmute(src:psrlvq256(a:a.as_i64x4(), count:count.as_i64x4()))
3118}
3119
3120// TODO _mm256_stream_load_si256 (__m256i const* mem_addr)
3121
3122/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
3123///
3124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi16)
3125#[inline]
3126#[target_feature(enable = "avx2")]
3127#[cfg_attr(test, assert_instr(vpsubw))]
3128#[stable(feature = "simd_x86", since = "1.27.0")]
3129pub unsafe fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3130 transmute(src:simd_sub(x:a.as_i16x16(), y:b.as_i16x16()))
3131}
3132
3133/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`
3134///
3135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi32)
3136#[inline]
3137#[target_feature(enable = "avx2")]
3138#[cfg_attr(test, assert_instr(vpsubd))]
3139#[stable(feature = "simd_x86", since = "1.27.0")]
3140pub unsafe fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3141 transmute(src:simd_sub(x:a.as_i32x8(), y:b.as_i32x8()))
3142}
3143
3144/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`
3145///
3146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi64)
3147#[inline]
3148#[target_feature(enable = "avx2")]
3149#[cfg_attr(test, assert_instr(vpsubq))]
3150#[stable(feature = "simd_x86", since = "1.27.0")]
3151pub unsafe fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3152 transmute(src:simd_sub(x:a.as_i64x4(), y:b.as_i64x4()))
3153}
3154
3155/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
3156///
3157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi8)
3158#[inline]
3159#[target_feature(enable = "avx2")]
3160#[cfg_attr(test, assert_instr(vpsubb))]
3161#[stable(feature = "simd_x86", since = "1.27.0")]
3162pub unsafe fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3163 transmute(src:simd_sub(x:a.as_i8x32(), y:b.as_i8x32()))
3164}
3165
3166/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in
3167/// `a` using saturation.
3168///
3169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi16)
3170#[inline]
3171#[target_feature(enable = "avx2")]
3172#[cfg_attr(test, assert_instr(vpsubsw))]
3173#[stable(feature = "simd_x86", since = "1.27.0")]
3174pub unsafe fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3175 transmute(src:simd_saturating_sub(x:a.as_i16x16(), y:b.as_i16x16()))
3176}
3177
3178/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in
3179/// `a` using saturation.
3180///
3181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi8)
3182#[inline]
3183#[target_feature(enable = "avx2")]
3184#[cfg_attr(test, assert_instr(vpsubsb))]
3185#[stable(feature = "simd_x86", since = "1.27.0")]
3186pub unsafe fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3187 transmute(src:simd_saturating_sub(x:a.as_i8x32(), y:b.as_i8x32()))
3188}
3189
3190/// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit
3191/// integers in `a` using saturation.
3192///
3193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu16)
3194#[inline]
3195#[target_feature(enable = "avx2")]
3196#[cfg_attr(test, assert_instr(vpsubusw))]
3197#[stable(feature = "simd_x86", since = "1.27.0")]
3198pub unsafe fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3199 transmute(src:simd_saturating_sub(x:a.as_u16x16(), y:b.as_u16x16()))
3200}
3201
3202/// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit
3203/// integers in `a` using saturation.
3204///
3205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu8)
3206#[inline]
3207#[target_feature(enable = "avx2")]
3208#[cfg_attr(test, assert_instr(vpsubusb))]
3209#[stable(feature = "simd_x86", since = "1.27.0")]
3210pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3211 transmute(src:simd_saturating_sub(x:a.as_u8x32(), y:b.as_u8x32()))
3212}
3213
3214/// Unpacks and interleave 8-bit integers from the high half of each
3215/// 128-bit lane in `a` and `b`.
3216///
3217/// ```rust
3218/// #[cfg(target_arch = "x86")]
3219/// use std::arch::x86::*;
3220/// #[cfg(target_arch = "x86_64")]
3221/// use std::arch::x86_64::*;
3222///
3223/// # fn main() {
3224/// # if is_x86_feature_detected!("avx2") {
3225/// # #[target_feature(enable = "avx2")]
3226/// # unsafe fn worker() {
3227/// let a = _mm256_setr_epi8(
3228/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3229/// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3230/// );
3231/// let b = _mm256_setr_epi8(
3232/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3233/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3234/// -30, -31,
3235/// );
3236///
3237/// let c = _mm256_unpackhi_epi8(a, b);
3238///
3239/// let expected = _mm256_setr_epi8(
3240/// 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15,
3241/// 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31,
3242/// -31,
3243/// );
3244/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3245///
3246/// # }
3247/// # unsafe { worker(); }
3248/// # }
3249/// # }
3250/// ```
3251///
3252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi8)
3253#[inline]
3254#[target_feature(enable = "avx2")]
3255#[cfg_attr(test, assert_instr(vpunpckhbw))]
3256#[stable(feature = "simd_x86", since = "1.27.0")]
3257pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3258 #[rustfmt::skip]
3259 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3260 8, 40, 9, 41, 10, 42, 11, 43,
3261 12, 44, 13, 45, 14, 46, 15, 47,
3262 24, 56, 25, 57, 26, 58, 27, 59,
3263 28, 60, 29, 61, 30, 62, 31, 63,
3264 ]);
3265 transmute(src:r)
3266}
3267
3268/// Unpacks and interleave 8-bit integers from the low half of each
3269/// 128-bit lane of `a` and `b`.
3270///
3271/// ```rust
3272/// #[cfg(target_arch = "x86")]
3273/// use std::arch::x86::*;
3274/// #[cfg(target_arch = "x86_64")]
3275/// use std::arch::x86_64::*;
3276///
3277/// # fn main() {
3278/// # if is_x86_feature_detected!("avx2") {
3279/// # #[target_feature(enable = "avx2")]
3280/// # unsafe fn worker() {
3281/// let a = _mm256_setr_epi8(
3282/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3283/// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3284/// );
3285/// let b = _mm256_setr_epi8(
3286/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3287/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3288/// -30, -31,
3289/// );
3290///
3291/// let c = _mm256_unpacklo_epi8(a, b);
3292///
3293/// let expected = _mm256_setr_epi8(
3294/// 0, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 16, -16, 17,
3295/// -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23,
3296/// );
3297/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3298///
3299/// # }
3300/// # unsafe { worker(); }
3301/// # }
3302/// # }
3303/// ```
3304///
3305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi8)
3306#[inline]
3307#[target_feature(enable = "avx2")]
3308#[cfg_attr(test, assert_instr(vpunpcklbw))]
3309#[stable(feature = "simd_x86", since = "1.27.0")]
3310pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3311 #[rustfmt::skip]
3312 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3313 0, 32, 1, 33, 2, 34, 3, 35,
3314 4, 36, 5, 37, 6, 38, 7, 39,
3315 16, 48, 17, 49, 18, 50, 19, 51,
3316 20, 52, 21, 53, 22, 54, 23, 55,
3317 ]);
3318 transmute(src:r)
3319}
3320
3321/// Unpacks and interleave 16-bit integers from the high half of each
3322/// 128-bit lane of `a` and `b`.
3323///
3324/// ```rust
3325/// #[cfg(target_arch = "x86")]
3326/// use std::arch::x86::*;
3327/// #[cfg(target_arch = "x86_64")]
3328/// use std::arch::x86_64::*;
3329///
3330/// # fn main() {
3331/// # if is_x86_feature_detected!("avx2") {
3332/// # #[target_feature(enable = "avx2")]
3333/// # unsafe fn worker() {
3334/// let a = _mm256_setr_epi16(
3335/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3336/// );
3337/// let b = _mm256_setr_epi16(
3338/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3339/// );
3340///
3341/// let c = _mm256_unpackhi_epi16(a, b);
3342///
3343/// let expected = _mm256_setr_epi16(
3344/// 4, -4, 5, -5, 6, -6, 7, -7, 12, -12, 13, -13, 14, -14, 15, -15,
3345/// );
3346/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3347///
3348/// # }
3349/// # unsafe { worker(); }
3350/// # }
3351/// # }
3352/// ```
3353///
3354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi16)
3355#[inline]
3356#[target_feature(enable = "avx2")]
3357#[cfg_attr(test, assert_instr(vpunpckhwd))]
3358#[stable(feature = "simd_x86", since = "1.27.0")]
3359pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3360 let r: i16x16 = simd_shuffle!(
3361 a.as_i16x16(),
3362 b.as_i16x16(),
3363 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3364 );
3365 transmute(src:r)
3366}
3367
3368/// Unpacks and interleave 16-bit integers from the low half of each
3369/// 128-bit lane of `a` and `b`.
3370///
3371/// ```rust
3372/// #[cfg(target_arch = "x86")]
3373/// use std::arch::x86::*;
3374/// #[cfg(target_arch = "x86_64")]
3375/// use std::arch::x86_64::*;
3376///
3377/// # fn main() {
3378/// # if is_x86_feature_detected!("avx2") {
3379/// # #[target_feature(enable = "avx2")]
3380/// # unsafe fn worker() {
3381///
3382/// let a = _mm256_setr_epi16(
3383/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3384/// );
3385/// let b = _mm256_setr_epi16(
3386/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3387/// );
3388///
3389/// let c = _mm256_unpacklo_epi16(a, b);
3390///
3391/// let expected = _mm256_setr_epi16(
3392/// 0, 0, 1, -1, 2, -2, 3, -3, 8, -8, 9, -9, 10, -10, 11, -11,
3393/// );
3394/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3395///
3396/// # }
3397/// # unsafe { worker(); }
3398/// # }
3399/// # }
3400/// ```
3401///
3402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi16)
3403#[inline]
3404#[target_feature(enable = "avx2")]
3405#[cfg_attr(test, assert_instr(vpunpcklwd))]
3406#[stable(feature = "simd_x86", since = "1.27.0")]
3407pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3408 let r: i16x16 = simd_shuffle!(
3409 a.as_i16x16(),
3410 b.as_i16x16(),
3411 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3412 );
3413 transmute(src:r)
3414}
3415
3416/// Unpacks and interleave 32-bit integers from the high half of each
3417/// 128-bit lane of `a` and `b`.
3418///
3419/// ```rust
3420/// #[cfg(target_arch = "x86")]
3421/// use std::arch::x86::*;
3422/// #[cfg(target_arch = "x86_64")]
3423/// use std::arch::x86_64::*;
3424///
3425/// # fn main() {
3426/// # if is_x86_feature_detected!("avx2") {
3427/// # #[target_feature(enable = "avx2")]
3428/// # unsafe fn worker() {
3429/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3430/// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3431///
3432/// let c = _mm256_unpackhi_epi32(a, b);
3433///
3434/// let expected = _mm256_setr_epi32(2, -2, 3, -3, 6, -6, 7, -7);
3435/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3436///
3437/// # }
3438/// # unsafe { worker(); }
3439/// # }
3440/// # }
3441/// ```
3442///
3443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi32)
3444#[inline]
3445#[target_feature(enable = "avx2")]
3446#[cfg_attr(test, assert_instr(vunpckhps))]
3447#[stable(feature = "simd_x86", since = "1.27.0")]
3448pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3449 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3450 transmute(src:r)
3451}
3452
3453/// Unpacks and interleave 32-bit integers from the low half of each
3454/// 128-bit lane of `a` and `b`.
3455///
3456/// ```rust
3457/// #[cfg(target_arch = "x86")]
3458/// use std::arch::x86::*;
3459/// #[cfg(target_arch = "x86_64")]
3460/// use std::arch::x86_64::*;
3461///
3462/// # fn main() {
3463/// # if is_x86_feature_detected!("avx2") {
3464/// # #[target_feature(enable = "avx2")]
3465/// # unsafe fn worker() {
3466/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3467/// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3468///
3469/// let c = _mm256_unpacklo_epi32(a, b);
3470///
3471/// let expected = _mm256_setr_epi32(0, 0, 1, -1, 4, -4, 5, -5);
3472/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3473///
3474/// # }
3475/// # unsafe { worker(); }
3476/// # }
3477/// # }
3478/// ```
3479///
3480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi32)
3481#[inline]
3482#[target_feature(enable = "avx2")]
3483#[cfg_attr(test, assert_instr(vunpcklps))]
3484#[stable(feature = "simd_x86", since = "1.27.0")]
3485pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3486 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3487 transmute(src:r)
3488}
3489
3490/// Unpacks and interleave 64-bit integers from the high half of each
3491/// 128-bit lane of `a` and `b`.
3492///
3493/// ```rust
3494/// #[cfg(target_arch = "x86")]
3495/// use std::arch::x86::*;
3496/// #[cfg(target_arch = "x86_64")]
3497/// use std::arch::x86_64::*;
3498///
3499/// # fn main() {
3500/// # if is_x86_feature_detected!("avx2") {
3501/// # #[target_feature(enable = "avx2")]
3502/// # unsafe fn worker() {
3503/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3504/// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3505///
3506/// let c = _mm256_unpackhi_epi64(a, b);
3507///
3508/// let expected = _mm256_setr_epi64x(1, -1, 3, -3);
3509/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3510///
3511/// # }
3512/// # unsafe { worker(); }
3513/// # }
3514/// # }
3515/// ```
3516///
3517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi64)
3518#[inline]
3519#[target_feature(enable = "avx2")]
3520#[cfg_attr(test, assert_instr(vunpckhpd))]
3521#[stable(feature = "simd_x86", since = "1.27.0")]
3522pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3523 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3524 transmute(src:r)
3525}
3526
3527/// Unpacks and interleave 64-bit integers from the low half of each
3528/// 128-bit lane of `a` and `b`.
3529///
3530/// ```rust
3531/// #[cfg(target_arch = "x86")]
3532/// use std::arch::x86::*;
3533/// #[cfg(target_arch = "x86_64")]
3534/// use std::arch::x86_64::*;
3535///
3536/// # fn main() {
3537/// # if is_x86_feature_detected!("avx2") {
3538/// # #[target_feature(enable = "avx2")]
3539/// # unsafe fn worker() {
3540/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3541/// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3542///
3543/// let c = _mm256_unpacklo_epi64(a, b);
3544///
3545/// let expected = _mm256_setr_epi64x(0, 0, 2, -2);
3546/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3547///
3548/// # }
3549/// # unsafe { worker(); }
3550/// # }
3551/// # }
3552/// ```
3553///
3554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi64)
3555#[inline]
3556#[target_feature(enable = "avx2")]
3557#[cfg_attr(test, assert_instr(vunpcklpd))]
3558#[stable(feature = "simd_x86", since = "1.27.0")]
3559pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3560 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3561 transmute(src:r)
3562}
3563
3564/// Computes the bitwise XOR of 256 bits (representing integer data)
3565/// in `a` and `b`
3566///
3567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_si256)
3568#[inline]
3569#[target_feature(enable = "avx2")]
3570#[cfg_attr(test, assert_instr(vxorps))]
3571#[stable(feature = "simd_x86", since = "1.27.0")]
3572pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3573 transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4()))
3574}
3575
3576/// Extracts an 8-bit integer from `a`, selected with `INDEX`. Returns a 32-bit
3577/// integer containing the zero-extended integer data.
3578///
3579/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
3580///
3581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi8)
3582#[inline]
3583#[target_feature(enable = "avx2")]
3584// This intrinsic has no corresponding instruction.
3585#[rustc_legacy_const_generics(1)]
3586#[stable(feature = "simd_x86", since = "1.27.0")]
3587pub unsafe fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3588 static_assert_uimm_bits!(INDEX, 5);
3589 simd_extract::<_, u8>(x:a.as_u8x32(), INDEX as u32) as i32
3590}
3591
3592/// Extracts a 16-bit integer from `a`, selected with `INDEX`. Returns a 32-bit
3593/// integer containing the zero-extended integer data.
3594///
3595/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
3596///
3597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi16)
3598#[inline]
3599#[target_feature(enable = "avx2")]
3600// This intrinsic has no corresponding instruction.
3601#[rustc_legacy_const_generics(1)]
3602#[stable(feature = "simd_x86", since = "1.27.0")]
3603pub unsafe fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3604 static_assert_uimm_bits!(INDEX, 4);
3605 simd_extract::<_, u16>(x:a.as_u16x16(), INDEX as u32) as i32
3606}
3607
3608/// Extracts a 32-bit integer from `a`, selected with `INDEX`.
3609///
3610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32)
3611#[inline]
3612#[target_feature(enable = "avx2")]
3613// This intrinsic has no corresponding instruction.
3614#[rustc_legacy_const_generics(1)]
3615#[stable(feature = "simd_x86", since = "1.27.0")]
3616pub unsafe fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
3617 static_assert_uimm_bits!(INDEX, 3);
3618 simd_extract(x:a.as_i32x8(), INDEX as u32)
3619}
3620
3621/// Returns the first element of the input vector of `[4 x double]`.
3622///
3623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsd_f64)
3624#[inline]
3625#[target_feature(enable = "avx2")]
3626//#[cfg_attr(test, assert_instr(movsd))] FIXME
3627#[stable(feature = "simd_x86", since = "1.27.0")]
3628pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 {
3629 simd_extract(x:a, idx:0)
3630}
3631
3632/// Returns the first element of the input vector of `[8 x i32]`.
3633///
3634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32)
3635#[inline]
3636#[target_feature(enable = "avx2")]
3637#[stable(feature = "simd_x86", since = "1.27.0")]
3638pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
3639 simd_extract(x:a.as_i32x8(), idx:0)
3640}
3641
3642#[allow(improper_ctypes)]
3643extern "C" {
3644 #[link_name = "llvm.x86.avx2.pabs.b"]
3645 fn pabsb(a: i8x32) -> u8x32;
3646 #[link_name = "llvm.x86.avx2.pabs.w"]
3647 fn pabsw(a: i16x16) -> u16x16;
3648 #[link_name = "llvm.x86.avx2.pabs.d"]
3649 fn pabsd(a: i32x8) -> u32x8;
3650 #[link_name = "llvm.x86.avx2.phadd.w"]
3651 fn phaddw(a: i16x16, b: i16x16) -> i16x16;
3652 #[link_name = "llvm.x86.avx2.phadd.d"]
3653 fn phaddd(a: i32x8, b: i32x8) -> i32x8;
3654 #[link_name = "llvm.x86.avx2.phadd.sw"]
3655 fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3656 #[link_name = "llvm.x86.avx2.phsub.w"]
3657 fn phsubw(a: i16x16, b: i16x16) -> i16x16;
3658 #[link_name = "llvm.x86.avx2.phsub.d"]
3659 fn phsubd(a: i32x8, b: i32x8) -> i32x8;
3660 #[link_name = "llvm.x86.avx2.phsub.sw"]
3661 fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3662 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3663 fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3664 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3665 fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
3666 #[link_name = "llvm.x86.avx2.maskload.d"]
3667 fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
3668 #[link_name = "llvm.x86.avx2.maskload.d.256"]
3669 fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
3670 #[link_name = "llvm.x86.avx2.maskload.q"]
3671 fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
3672 #[link_name = "llvm.x86.avx2.maskload.q.256"]
3673 fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
3674 #[link_name = "llvm.x86.avx2.maskstore.d"]
3675 fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
3676 #[link_name = "llvm.x86.avx2.maskstore.d.256"]
3677 fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
3678 #[link_name = "llvm.x86.avx2.maskstore.q"]
3679 fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
3680 #[link_name = "llvm.x86.avx2.maskstore.q.256"]
3681 fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
3682 #[link_name = "llvm.x86.avx2.mpsadbw"]
3683 fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
3684 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3685 fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3686 #[link_name = "llvm.x86.avx2.packsswb"]
3687 fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3688 #[link_name = "llvm.x86.avx2.packssdw"]
3689 fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3690 #[link_name = "llvm.x86.avx2.packuswb"]
3691 fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3692 #[link_name = "llvm.x86.avx2.packusdw"]
3693 fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3694 #[link_name = "llvm.x86.avx2.psad.bw"]
3695 fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3696 #[link_name = "llvm.x86.avx2.psign.b"]
3697 fn psignb(a: i8x32, b: i8x32) -> i8x32;
3698 #[link_name = "llvm.x86.avx2.psign.w"]
3699 fn psignw(a: i16x16, b: i16x16) -> i16x16;
3700 #[link_name = "llvm.x86.avx2.psign.d"]
3701 fn psignd(a: i32x8, b: i32x8) -> i32x8;
3702 #[link_name = "llvm.x86.avx2.psll.w"]
3703 fn psllw(a: i16x16, count: i16x8) -> i16x16;
3704 #[link_name = "llvm.x86.avx2.psll.d"]
3705 fn pslld(a: i32x8, count: i32x4) -> i32x8;
3706 #[link_name = "llvm.x86.avx2.psll.q"]
3707 fn psllq(a: i64x4, count: i64x2) -> i64x4;
3708 #[link_name = "llvm.x86.avx2.psllv.d"]
3709 fn psllvd(a: i32x4, count: i32x4) -> i32x4;
3710 #[link_name = "llvm.x86.avx2.psllv.d.256"]
3711 fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
3712 #[link_name = "llvm.x86.avx2.psllv.q"]
3713 fn psllvq(a: i64x2, count: i64x2) -> i64x2;
3714 #[link_name = "llvm.x86.avx2.psllv.q.256"]
3715 fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
3716 #[link_name = "llvm.x86.avx2.psra.w"]
3717 fn psraw(a: i16x16, count: i16x8) -> i16x16;
3718 #[link_name = "llvm.x86.avx2.psra.d"]
3719 fn psrad(a: i32x8, count: i32x4) -> i32x8;
3720 #[link_name = "llvm.x86.avx2.psrav.d"]
3721 fn psravd(a: i32x4, count: i32x4) -> i32x4;
3722 #[link_name = "llvm.x86.avx2.psrav.d.256"]
3723 fn psravd256(a: i32x8, count: i32x8) -> i32x8;
3724 #[link_name = "llvm.x86.avx2.psrl.w"]
3725 fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3726 #[link_name = "llvm.x86.avx2.psrl.d"]
3727 fn psrld(a: i32x8, count: i32x4) -> i32x8;
3728 #[link_name = "llvm.x86.avx2.psrl.q"]
3729 fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3730 #[link_name = "llvm.x86.avx2.psrlv.d"]
3731 fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
3732 #[link_name = "llvm.x86.avx2.psrlv.d.256"]
3733 fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
3734 #[link_name = "llvm.x86.avx2.psrlv.q"]
3735 fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
3736 #[link_name = "llvm.x86.avx2.psrlv.q.256"]
3737 fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
3738 #[link_name = "llvm.x86.avx2.pshuf.b"]
3739 fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3740 #[link_name = "llvm.x86.avx2.permd"]
3741 fn permd(a: u32x8, b: u32x8) -> u32x8;
3742 #[link_name = "llvm.x86.avx2.permps"]
3743 fn permps(a: __m256, b: i32x8) -> __m256;
3744 #[link_name = "llvm.x86.avx2.vperm2i128"]
3745 fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4;
3746 #[link_name = "llvm.x86.avx2.gather.d.d"]
3747 fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3748 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3749 fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3750 #[link_name = "llvm.x86.avx2.gather.d.q"]
3751 fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3752 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3753 fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3754 #[link_name = "llvm.x86.avx2.gather.q.d"]
3755 fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3756 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3757 fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3758 #[link_name = "llvm.x86.avx2.gather.q.q"]
3759 fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3760 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3761 fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3762 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3763 fn pgatherdpd(
3764 src: __m128d,
3765 slice: *const i8,
3766 offsets: i32x4,
3767 mask: __m128d,
3768 scale: i8,
3769 ) -> __m128d;
3770 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3771 fn vpgatherdpd(
3772 src: __m256d,
3773 slice: *const i8,
3774 offsets: i32x4,
3775 mask: __m256d,
3776 scale: i8,
3777 ) -> __m256d;
3778 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3779 fn pgatherqpd(
3780 src: __m128d,
3781 slice: *const i8,
3782 offsets: i64x2,
3783 mask: __m128d,
3784 scale: i8,
3785 ) -> __m128d;
3786 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3787 fn vpgatherqpd(
3788 src: __m256d,
3789 slice: *const i8,
3790 offsets: i64x4,
3791 mask: __m256d,
3792 scale: i8,
3793 ) -> __m256d;
3794 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3795 fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3796 -> __m128;
3797 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3798 fn vpgatherdps(
3799 src: __m256,
3800 slice: *const i8,
3801 offsets: i32x8,
3802 mask: __m256,
3803 scale: i8,
3804 ) -> __m256;
3805 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3806 fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3807 -> __m128;
3808 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3809 fn vpgatherqps(
3810 src: __m128,
3811 slice: *const i8,
3812 offsets: i64x4,
3813 mask: __m128,
3814 scale: i8,
3815 ) -> __m128;
3816 #[link_name = "llvm.x86.avx2.psll.dq"]
3817 fn vpslldq(a: i64x4, b: i32) -> i64x4;
3818 #[link_name = "llvm.x86.avx2.psrl.dq"]
3819 fn vpsrldq(a: i64x4, b: i32) -> i64x4;
3820}
3821
3822#[cfg(test)]
3823mod tests {
3824
3825 use stdarch_test::simd_test;
3826
3827 use crate::core_arch::x86::*;
3828
3829 #[simd_test(enable = "avx2")]
3830 unsafe fn test_mm256_abs_epi32() {
3831 #[rustfmt::skip]
3832 let a = _mm256_setr_epi32(
3833 0, 1, -1, i32::MAX,
3834 i32::MIN, 100, -100, -32,
3835 );
3836 let r = _mm256_abs_epi32(a);
3837 #[rustfmt::skip]
3838 let e = _mm256_setr_epi32(
3839 0, 1, 1, i32::MAX,
3840 i32::MAX.wrapping_add(1), 100, 100, 32,
3841 );
3842 assert_eq_m256i(r, e);
3843 }
3844
3845 #[simd_test(enable = "avx2")]
3846 unsafe fn test_mm256_abs_epi16() {
3847 #[rustfmt::skip]
3848 let a = _mm256_setr_epi16(
3849 0, 1, -1, 2, -2, 3, -3, 4,
3850 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3851 );
3852 let r = _mm256_abs_epi16(a);
3853 #[rustfmt::skip]
3854 let e = _mm256_setr_epi16(
3855 0, 1, 1, 2, 2, 3, 3, 4,
3856 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3857 );
3858 assert_eq_m256i(r, e);
3859 }
3860
3861 #[simd_test(enable = "avx2")]
3862 unsafe fn test_mm256_abs_epi8() {
3863 #[rustfmt::skip]
3864 let a = _mm256_setr_epi8(
3865 0, 1, -1, 2, -2, 3, -3, 4,
3866 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3867 0, 1, -1, 2, -2, 3, -3, 4,
3868 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3869 );
3870 let r = _mm256_abs_epi8(a);
3871 #[rustfmt::skip]
3872 let e = _mm256_setr_epi8(
3873 0, 1, 1, 2, 2, 3, 3, 4,
3874 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3875 0, 1, 1, 2, 2, 3, 3, 4,
3876 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3877 );
3878 assert_eq_m256i(r, e);
3879 }
3880
3881 #[simd_test(enable = "avx2")]
3882 unsafe fn test_mm256_add_epi64() {
3883 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
3884 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
3885 let r = _mm256_add_epi64(a, b);
3886 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
3887 assert_eq_m256i(r, e);
3888 }
3889
3890 #[simd_test(enable = "avx2")]
3891 unsafe fn test_mm256_add_epi32() {
3892 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
3893 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3894 let r = _mm256_add_epi32(a, b);
3895 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
3896 assert_eq_m256i(r, e);
3897 }
3898
3899 #[simd_test(enable = "avx2")]
3900 unsafe fn test_mm256_add_epi16() {
3901 #[rustfmt::skip]
3902 let a = _mm256_setr_epi16(
3903 0, 1, 2, 3, 4, 5, 6, 7,
3904 8, 9, 10, 11, 12, 13, 14, 15,
3905 );
3906 #[rustfmt::skip]
3907 let b = _mm256_setr_epi16(
3908 0, 1, 2, 3, 4, 5, 6, 7,
3909 8, 9, 10, 11, 12, 13, 14, 15,
3910 );
3911 let r = _mm256_add_epi16(a, b);
3912 #[rustfmt::skip]
3913 let e = _mm256_setr_epi16(
3914 0, 2, 4, 6, 8, 10, 12, 14,
3915 16, 18, 20, 22, 24, 26, 28, 30,
3916 );
3917 assert_eq_m256i(r, e);
3918 }
3919
3920 #[simd_test(enable = "avx2")]
3921 unsafe fn test_mm256_add_epi8() {
3922 #[rustfmt::skip]
3923 let a = _mm256_setr_epi8(
3924 0, 1, 2, 3, 4, 5, 6, 7,
3925 8, 9, 10, 11, 12, 13, 14, 15,
3926 16, 17, 18, 19, 20, 21, 22, 23,
3927 24, 25, 26, 27, 28, 29, 30, 31,
3928 );
3929 #[rustfmt::skip]
3930 let b = _mm256_setr_epi8(
3931 0, 1, 2, 3, 4, 5, 6, 7,
3932 8, 9, 10, 11, 12, 13, 14, 15,
3933 16, 17, 18, 19, 20, 21, 22, 23,
3934 24, 25, 26, 27, 28, 29, 30, 31,
3935 );
3936 let r = _mm256_add_epi8(a, b);
3937 #[rustfmt::skip]
3938 let e = _mm256_setr_epi8(
3939 0, 2, 4, 6, 8, 10, 12, 14,
3940 16, 18, 20, 22, 24, 26, 28, 30,
3941 32, 34, 36, 38, 40, 42, 44, 46,
3942 48, 50, 52, 54, 56, 58, 60, 62,
3943 );
3944 assert_eq_m256i(r, e);
3945 }
3946
3947 #[simd_test(enable = "avx2")]
3948 unsafe fn test_mm256_adds_epi8() {
3949 #[rustfmt::skip]
3950 let a = _mm256_setr_epi8(
3951 0, 1, 2, 3, 4, 5, 6, 7,
3952 8, 9, 10, 11, 12, 13, 14, 15,
3953 16, 17, 18, 19, 20, 21, 22, 23,
3954 24, 25, 26, 27, 28, 29, 30, 31,
3955 );
3956 #[rustfmt::skip]
3957 let b = _mm256_setr_epi8(
3958 32, 33, 34, 35, 36, 37, 38, 39,
3959 40, 41, 42, 43, 44, 45, 46, 47,
3960 48, 49, 50, 51, 52, 53, 54, 55,
3961 56, 57, 58, 59, 60, 61, 62, 63,
3962 );
3963 let r = _mm256_adds_epi8(a, b);
3964 #[rustfmt::skip]
3965 let e = _mm256_setr_epi8(
3966 32, 34, 36, 38, 40, 42, 44, 46,
3967 48, 50, 52, 54, 56, 58, 60, 62,
3968 64, 66, 68, 70, 72, 74, 76, 78,
3969 80, 82, 84, 86, 88, 90, 92, 94,
3970 );
3971 assert_eq_m256i(r, e);
3972 }
3973
3974 #[simd_test(enable = "avx2")]
3975 unsafe fn test_mm256_adds_epi8_saturate_positive() {
3976 let a = _mm256_set1_epi8(0x7F);
3977 let b = _mm256_set1_epi8(1);
3978 let r = _mm256_adds_epi8(a, b);
3979 assert_eq_m256i(r, a);
3980 }
3981
3982 #[simd_test(enable = "avx2")]
3983 unsafe fn test_mm256_adds_epi8_saturate_negative() {
3984 let a = _mm256_set1_epi8(-0x80);
3985 let b = _mm256_set1_epi8(-1);
3986 let r = _mm256_adds_epi8(a, b);
3987 assert_eq_m256i(r, a);
3988 }
3989
3990 #[simd_test(enable = "avx2")]
3991 unsafe fn test_mm256_adds_epi16() {
3992 #[rustfmt::skip]
3993 let a = _mm256_setr_epi16(
3994 0, 1, 2, 3, 4, 5, 6, 7,
3995 8, 9, 10, 11, 12, 13, 14, 15,
3996 );
3997 #[rustfmt::skip]
3998 let b = _mm256_setr_epi16(
3999 32, 33, 34, 35, 36, 37, 38, 39,
4000 40, 41, 42, 43, 44, 45, 46, 47,
4001 );
4002 let r = _mm256_adds_epi16(a, b);
4003 #[rustfmt::skip]
4004 let e = _mm256_setr_epi16(
4005 32, 34, 36, 38, 40, 42, 44, 46,
4006 48, 50, 52, 54, 56, 58, 60, 62,
4007 );
4008
4009 assert_eq_m256i(r, e);
4010 }
4011
4012 #[simd_test(enable = "avx2")]
4013 unsafe fn test_mm256_adds_epi16_saturate_positive() {
4014 let a = _mm256_set1_epi16(0x7FFF);
4015 let b = _mm256_set1_epi16(1);
4016 let r = _mm256_adds_epi16(a, b);
4017 assert_eq_m256i(r, a);
4018 }
4019
4020 #[simd_test(enable = "avx2")]
4021 unsafe fn test_mm256_adds_epi16_saturate_negative() {
4022 let a = _mm256_set1_epi16(-0x8000);
4023 let b = _mm256_set1_epi16(-1);
4024 let r = _mm256_adds_epi16(a, b);
4025 assert_eq_m256i(r, a);
4026 }
4027
4028 #[simd_test(enable = "avx2")]
4029 unsafe fn test_mm256_adds_epu8() {
4030 #[rustfmt::skip]
4031 let a = _mm256_setr_epi8(
4032 0, 1, 2, 3, 4, 5, 6, 7,
4033 8, 9, 10, 11, 12, 13, 14, 15,
4034 16, 17, 18, 19, 20, 21, 22, 23,
4035 24, 25, 26, 27, 28, 29, 30, 31,
4036 );
4037 #[rustfmt::skip]
4038 let b = _mm256_setr_epi8(
4039 32, 33, 34, 35, 36, 37, 38, 39,
4040 40, 41, 42, 43, 44, 45, 46, 47,
4041 48, 49, 50, 51, 52, 53, 54, 55,
4042 56, 57, 58, 59, 60, 61, 62, 63,
4043 );
4044 let r = _mm256_adds_epu8(a, b);
4045 #[rustfmt::skip]
4046 let e = _mm256_setr_epi8(
4047 32, 34, 36, 38, 40, 42, 44, 46,
4048 48, 50, 52, 54, 56, 58, 60, 62,
4049 64, 66, 68, 70, 72, 74, 76, 78,
4050 80, 82, 84, 86, 88, 90, 92, 94,
4051 );
4052 assert_eq_m256i(r, e);
4053 }
4054
4055 #[simd_test(enable = "avx2")]
4056 unsafe fn test_mm256_adds_epu8_saturate() {
4057 let a = _mm256_set1_epi8(!0);
4058 let b = _mm256_set1_epi8(1);
4059 let r = _mm256_adds_epu8(a, b);
4060 assert_eq_m256i(r, a);
4061 }
4062
4063 #[simd_test(enable = "avx2")]
4064 unsafe fn test_mm256_adds_epu16() {
4065 #[rustfmt::skip]
4066 let a = _mm256_setr_epi16(
4067 0, 1, 2, 3, 4, 5, 6, 7,
4068 8, 9, 10, 11, 12, 13, 14, 15,
4069 );
4070 #[rustfmt::skip]
4071 let b = _mm256_setr_epi16(
4072 32, 33, 34, 35, 36, 37, 38, 39,
4073 40, 41, 42, 43, 44, 45, 46, 47,
4074 );
4075 let r = _mm256_adds_epu16(a, b);
4076 #[rustfmt::skip]
4077 let e = _mm256_setr_epi16(
4078 32, 34, 36, 38, 40, 42, 44, 46,
4079 48, 50, 52, 54, 56, 58, 60, 62,
4080 );
4081
4082 assert_eq_m256i(r, e);
4083 }
4084
4085 #[simd_test(enable = "avx2")]
4086 unsafe fn test_mm256_adds_epu16_saturate() {
4087 let a = _mm256_set1_epi16(!0);
4088 let b = _mm256_set1_epi16(1);
4089 let r = _mm256_adds_epu16(a, b);
4090 assert_eq_m256i(r, a);
4091 }
4092
4093 #[simd_test(enable = "avx2")]
4094 unsafe fn test_mm256_and_si256() {
4095 let a = _mm256_set1_epi8(5);
4096 let b = _mm256_set1_epi8(3);
4097 let got = _mm256_and_si256(a, b);
4098 assert_eq_m256i(got, _mm256_set1_epi8(1));
4099 }
4100
4101 #[simd_test(enable = "avx2")]
4102 unsafe fn test_mm256_andnot_si256() {
4103 let a = _mm256_set1_epi8(5);
4104 let b = _mm256_set1_epi8(3);
4105 let got = _mm256_andnot_si256(a, b);
4106 assert_eq_m256i(got, _mm256_set1_epi8(2));
4107 }
4108
4109 #[simd_test(enable = "avx2")]
4110 unsafe fn test_mm256_avg_epu8() {
4111 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4112 let r = _mm256_avg_epu8(a, b);
4113 assert_eq_m256i(r, _mm256_set1_epi8(6));
4114 }
4115
4116 #[simd_test(enable = "avx2")]
4117 unsafe fn test_mm256_avg_epu16() {
4118 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4119 let r = _mm256_avg_epu16(a, b);
4120 assert_eq_m256i(r, _mm256_set1_epi16(6));
4121 }
4122
4123 #[simd_test(enable = "avx2")]
4124 unsafe fn test_mm_blend_epi32() {
4125 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4126 let e = _mm_setr_epi32(9, 3, 3, 3);
4127 let r = _mm_blend_epi32::<0x01>(a, b);
4128 assert_eq_m128i(r, e);
4129
4130 let r = _mm_blend_epi32::<0x0E>(b, a);
4131 assert_eq_m128i(r, e);
4132 }
4133
4134 #[simd_test(enable = "avx2")]
4135 unsafe fn test_mm256_blend_epi32() {
4136 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4137 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4138 let r = _mm256_blend_epi32::<0x01>(a, b);
4139 assert_eq_m256i(r, e);
4140
4141 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4142 let r = _mm256_blend_epi32::<0x82>(a, b);
4143 assert_eq_m256i(r, e);
4144
4145 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4146 let r = _mm256_blend_epi32::<0x7C>(a, b);
4147 assert_eq_m256i(r, e);
4148 }
4149
4150 #[simd_test(enable = "avx2")]
4151 unsafe fn test_mm256_blend_epi16() {
4152 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4153 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4154 let r = _mm256_blend_epi16::<0x01>(a, b);
4155 assert_eq_m256i(r, e);
4156
4157 let r = _mm256_blend_epi16::<0xFE>(b, a);
4158 assert_eq_m256i(r, e);
4159 }
4160
4161 #[simd_test(enable = "avx2")]
4162 unsafe fn test_mm256_blendv_epi8() {
4163 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4164 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4165 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4166 let r = _mm256_blendv_epi8(a, b, mask);
4167 assert_eq_m256i(r, e);
4168 }
4169
4170 #[simd_test(enable = "avx2")]
4171 unsafe fn test_mm_broadcastb_epi8() {
4172 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4173 let res = _mm_broadcastb_epi8(a);
4174 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4175 }
4176
4177 #[simd_test(enable = "avx2")]
4178 unsafe fn test_mm256_broadcastb_epi8() {
4179 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4180 let res = _mm256_broadcastb_epi8(a);
4181 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4182 }
4183
4184 #[simd_test(enable = "avx2")]
4185 unsafe fn test_mm_broadcastd_epi32() {
4186 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4187 let res = _mm_broadcastd_epi32(a);
4188 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4189 }
4190
4191 #[simd_test(enable = "avx2")]
4192 unsafe fn test_mm256_broadcastd_epi32() {
4193 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4194 let res = _mm256_broadcastd_epi32(a);
4195 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4196 }
4197
4198 #[simd_test(enable = "avx2")]
4199 unsafe fn test_mm_broadcastq_epi64() {
4200 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4201 let res = _mm_broadcastq_epi64(a);
4202 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4203 }
4204
4205 #[simd_test(enable = "avx2")]
4206 unsafe fn test_mm256_broadcastq_epi64() {
4207 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4208 let res = _mm256_broadcastq_epi64(a);
4209 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4210 }
4211
4212 #[simd_test(enable = "avx2")]
4213 unsafe fn test_mm_broadcastsd_pd() {
4214 let a = _mm_setr_pd(6.28, 3.14);
4215 let res = _mm_broadcastsd_pd(a);
4216 assert_eq_m128d(res, _mm_set1_pd(6.28f64));
4217 }
4218
4219 #[simd_test(enable = "avx2")]
4220 unsafe fn test_mm256_broadcastsd_pd() {
4221 let a = _mm_setr_pd(6.28, 3.14);
4222 let res = _mm256_broadcastsd_pd(a);
4223 assert_eq_m256d(res, _mm256_set1_pd(6.28f64));
4224 }
4225
4226 #[simd_test(enable = "avx2")]
4227 unsafe fn test_mm256_broadcastsi128_si256() {
4228 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4229 let res = _mm256_broadcastsi128_si256(a);
4230 let retval = _mm256_setr_epi64x(
4231 0x0987654321012334,
4232 0x5678909876543210,
4233 0x0987654321012334,
4234 0x5678909876543210,
4235 );
4236 assert_eq_m256i(res, retval);
4237 }
4238
4239 #[simd_test(enable = "avx2")]
4240 unsafe fn test_mm_broadcastss_ps() {
4241 let a = _mm_setr_ps(6.28, 3.14, 0.0, 0.0);
4242 let res = _mm_broadcastss_ps(a);
4243 assert_eq_m128(res, _mm_set1_ps(6.28f32));
4244 }
4245
4246 #[simd_test(enable = "avx2")]
4247 unsafe fn test_mm256_broadcastss_ps() {
4248 let a = _mm_setr_ps(6.28, 3.14, 0.0, 0.0);
4249 let res = _mm256_broadcastss_ps(a);
4250 assert_eq_m256(res, _mm256_set1_ps(6.28f32));
4251 }
4252
4253 #[simd_test(enable = "avx2")]
4254 unsafe fn test_mm_broadcastw_epi16() {
4255 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4256 let res = _mm_broadcastw_epi16(a);
4257 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4258 }
4259
4260 #[simd_test(enable = "avx2")]
4261 unsafe fn test_mm256_broadcastw_epi16() {
4262 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4263 let res = _mm256_broadcastw_epi16(a);
4264 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4265 }
4266
4267 #[simd_test(enable = "avx2")]
4268 unsafe fn test_mm256_cmpeq_epi8() {
4269 #[rustfmt::skip]
4270 let a = _mm256_setr_epi8(
4271 0, 1, 2, 3, 4, 5, 6, 7,
4272 8, 9, 10, 11, 12, 13, 14, 15,
4273 16, 17, 18, 19, 20, 21, 22, 23,
4274 24, 25, 26, 27, 28, 29, 30, 31,
4275 );
4276 #[rustfmt::skip]
4277 let b = _mm256_setr_epi8(
4278 31, 30, 2, 28, 27, 26, 25, 24,
4279 23, 22, 21, 20, 19, 18, 17, 16,
4280 15, 14, 13, 12, 11, 10, 9, 8,
4281 7, 6, 5, 4, 3, 2, 1, 0,
4282 );
4283 let r = _mm256_cmpeq_epi8(a, b);
4284 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4285 }
4286
4287 #[simd_test(enable = "avx2")]
4288 unsafe fn test_mm256_cmpeq_epi16() {
4289 #[rustfmt::skip]
4290 let a = _mm256_setr_epi16(
4291 0, 1, 2, 3, 4, 5, 6, 7,
4292 8, 9, 10, 11, 12, 13, 14, 15,
4293 );
4294 #[rustfmt::skip]
4295 let b = _mm256_setr_epi16(
4296 15, 14, 2, 12, 11, 10, 9, 8,
4297 7, 6, 5, 4, 3, 2, 1, 0,
4298 );
4299 let r = _mm256_cmpeq_epi16(a, b);
4300 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4301 }
4302
4303 #[simd_test(enable = "avx2")]
4304 unsafe fn test_mm256_cmpeq_epi32() {
4305 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4306 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4307 let r = _mm256_cmpeq_epi32(a, b);
4308 let e = _mm256_set1_epi32(0);
4309 let e = _mm256_insert_epi32::<2>(e, !0);
4310 assert_eq_m256i(r, e);
4311 }
4312
4313 #[simd_test(enable = "avx2")]
4314 unsafe fn test_mm256_cmpeq_epi64() {
4315 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4316 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4317 let r = _mm256_cmpeq_epi64(a, b);
4318 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4319 }
4320
4321 #[simd_test(enable = "avx2")]
4322 unsafe fn test_mm256_cmpgt_epi8() {
4323 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4324 let b = _mm256_set1_epi8(0);
4325 let r = _mm256_cmpgt_epi8(a, b);
4326 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4327 }
4328
4329 #[simd_test(enable = "avx2")]
4330 unsafe fn test_mm256_cmpgt_epi16() {
4331 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4332 let b = _mm256_set1_epi16(0);
4333 let r = _mm256_cmpgt_epi16(a, b);
4334 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4335 }
4336
4337 #[simd_test(enable = "avx2")]
4338 unsafe fn test_mm256_cmpgt_epi32() {
4339 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4340 let b = _mm256_set1_epi32(0);
4341 let r = _mm256_cmpgt_epi32(a, b);
4342 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4343 }
4344
4345 #[simd_test(enable = "avx2")]
4346 unsafe fn test_mm256_cmpgt_epi64() {
4347 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4348 let b = _mm256_set1_epi64x(0);
4349 let r = _mm256_cmpgt_epi64(a, b);
4350 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4351 }
4352
4353 #[simd_test(enable = "avx2")]
4354 unsafe fn test_mm256_cvtepi8_epi16() {
4355 #[rustfmt::skip]
4356 let a = _mm_setr_epi8(
4357 0, 0, -1, 1, -2, 2, -3, 3,
4358 -4, 4, -5, 5, -6, 6, -7, 7,
4359 );
4360 #[rustfmt::skip]
4361 let r = _mm256_setr_epi16(
4362 0, 0, -1, 1, -2, 2, -3, 3,
4363 -4, 4, -5, 5, -6, 6, -7, 7,
4364 );
4365 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4366 }
4367
4368 #[simd_test(enable = "avx2")]
4369 unsafe fn test_mm256_cvtepi8_epi32() {
4370 #[rustfmt::skip]
4371 let a = _mm_setr_epi8(
4372 0, 0, -1, 1, -2, 2, -3, 3,
4373 -4, 4, -5, 5, -6, 6, -7, 7,
4374 );
4375 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4376 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4377 }
4378
4379 #[simd_test(enable = "avx2")]
4380 unsafe fn test_mm256_cvtepi8_epi64() {
4381 #[rustfmt::skip]
4382 let a = _mm_setr_epi8(
4383 0, 0, -1, 1, -2, 2, -3, 3,
4384 -4, 4, -5, 5, -6, 6, -7, 7,
4385 );
4386 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4387 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4388 }
4389
4390 #[simd_test(enable = "avx2")]
4391 unsafe fn test_mm256_cvtepi16_epi32() {
4392 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4393 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4394 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4395 }
4396
4397 #[simd_test(enable = "avx2")]
4398 unsafe fn test_mm256_cvtepi16_epi64() {
4399 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4400 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4401 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4402 }
4403
4404 #[simd_test(enable = "avx2")]
4405 unsafe fn test_mm256_cvtepi32_epi64() {
4406 let a = _mm_setr_epi32(0, 0, -1, 1);
4407 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4408 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4409 }
4410
4411 #[simd_test(enable = "avx2")]
4412 unsafe fn test_mm256_cvtepu16_epi32() {
4413 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4414 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4415 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4416 }
4417
4418 #[simd_test(enable = "avx2")]
4419 unsafe fn test_mm256_cvtepu16_epi64() {
4420 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4421 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4422 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4423 }
4424
4425 #[simd_test(enable = "avx2")]
4426 unsafe fn test_mm256_cvtepu32_epi64() {
4427 let a = _mm_setr_epi32(0, 1, 2, 3);
4428 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4429 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4430 }
4431
4432 #[simd_test(enable = "avx2")]
4433 unsafe fn test_mm256_cvtepu8_epi16() {
4434 #[rustfmt::skip]
4435 let a = _mm_setr_epi8(
4436 0, 1, 2, 3, 4, 5, 6, 7,
4437 8, 9, 10, 11, 12, 13, 14, 15,
4438 );
4439 #[rustfmt::skip]
4440 let r = _mm256_setr_epi16(
4441 0, 1, 2, 3, 4, 5, 6, 7,
4442 8, 9, 10, 11, 12, 13, 14, 15,
4443 );
4444 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4445 }
4446
4447 #[simd_test(enable = "avx2")]
4448 unsafe fn test_mm256_cvtepu8_epi32() {
4449 #[rustfmt::skip]
4450 let a = _mm_setr_epi8(
4451 0, 1, 2, 3, 4, 5, 6, 7,
4452 8, 9, 10, 11, 12, 13, 14, 15,
4453 );
4454 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4455 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4456 }
4457
4458 #[simd_test(enable = "avx2")]
4459 unsafe fn test_mm256_cvtepu8_epi64() {
4460 #[rustfmt::skip]
4461 let a = _mm_setr_epi8(
4462 0, 1, 2, 3, 4, 5, 6, 7,
4463 8, 9, 10, 11, 12, 13, 14, 15,
4464 );
4465 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4466 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4467 }
4468
4469 #[simd_test(enable = "avx2")]
4470 unsafe fn test_mm256_extracti128_si256() {
4471 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4472 let r = _mm256_extracti128_si256::<1>(a);
4473 let e = _mm_setr_epi64x(3, 4);
4474 assert_eq_m128i(r, e);
4475 }
4476
4477 #[simd_test(enable = "avx2")]
4478 unsafe fn test_mm256_hadd_epi16() {
4479 let a = _mm256_set1_epi16(2);
4480 let b = _mm256_set1_epi16(4);
4481 let r = _mm256_hadd_epi16(a, b);
4482 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4483 assert_eq_m256i(r, e);
4484 }
4485
4486 #[simd_test(enable = "avx2")]
4487 unsafe fn test_mm256_hadd_epi32() {
4488 let a = _mm256_set1_epi32(2);
4489 let b = _mm256_set1_epi32(4);
4490 let r = _mm256_hadd_epi32(a, b);
4491 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4492 assert_eq_m256i(r, e);
4493 }
4494
4495 #[simd_test(enable = "avx2")]
4496 unsafe fn test_mm256_hadds_epi16() {
4497 let a = _mm256_set1_epi16(2);
4498 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4499 let a = _mm256_insert_epi16::<1>(a, 1);
4500 let b = _mm256_set1_epi16(4);
4501 let r = _mm256_hadds_epi16(a, b);
4502 #[rustfmt::skip]
4503 let e = _mm256_setr_epi16(
4504 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4505 4, 4, 4, 4, 8, 8, 8, 8,
4506 );
4507 assert_eq_m256i(r, e);
4508 }
4509
4510 #[simd_test(enable = "avx2")]
4511 unsafe fn test_mm256_hsub_epi16() {
4512 let a = _mm256_set1_epi16(2);
4513 let b = _mm256_set1_epi16(4);
4514 let r = _mm256_hsub_epi16(a, b);
4515 let e = _mm256_set1_epi16(0);
4516 assert_eq_m256i(r, e);
4517 }
4518
4519 #[simd_test(enable = "avx2")]
4520 unsafe fn test_mm256_hsub_epi32() {
4521 let a = _mm256_set1_epi32(2);
4522 let b = _mm256_set1_epi32(4);
4523 let r = _mm256_hsub_epi32(a, b);
4524 let e = _mm256_set1_epi32(0);
4525 assert_eq_m256i(r, e);
4526 }
4527
4528 #[simd_test(enable = "avx2")]
4529 unsafe fn test_mm256_hsubs_epi16() {
4530 let a = _mm256_set1_epi16(2);
4531 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4532 let a = _mm256_insert_epi16::<1>(a, -1);
4533 let b = _mm256_set1_epi16(4);
4534 let r = _mm256_hsubs_epi16(a, b);
4535 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4536 assert_eq_m256i(r, e);
4537 }
4538
4539 #[simd_test(enable = "avx2")]
4540 unsafe fn test_mm256_madd_epi16() {
4541 let a = _mm256_set1_epi16(2);
4542 let b = _mm256_set1_epi16(4);
4543 let r = _mm256_madd_epi16(a, b);
4544 let e = _mm256_set1_epi32(16);
4545 assert_eq_m256i(r, e);
4546 }
4547
4548 #[simd_test(enable = "avx2")]
4549 unsafe fn test_mm256_inserti128_si256() {
4550 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4551 let b = _mm_setr_epi64x(7, 8);
4552 let r = _mm256_inserti128_si256::<1>(a, b);
4553 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4554 assert_eq_m256i(r, e);
4555 }
4556
4557 #[simd_test(enable = "avx2")]
4558 unsafe fn test_mm256_maddubs_epi16() {
4559 let a = _mm256_set1_epi8(2);
4560 let b = _mm256_set1_epi8(4);
4561 let r = _mm256_maddubs_epi16(a, b);
4562 let e = _mm256_set1_epi16(16);
4563 assert_eq_m256i(r, e);
4564 }
4565
4566 #[simd_test(enable = "avx2")]
4567 unsafe fn test_mm_maskload_epi32() {
4568 let nums = [1, 2, 3, 4];
4569 let a = &nums as *const i32;
4570 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4571 let r = _mm_maskload_epi32(a, mask);
4572 let e = _mm_setr_epi32(1, 0, 0, 4);
4573 assert_eq_m128i(r, e);
4574 }
4575
4576 #[simd_test(enable = "avx2")]
4577 unsafe fn test_mm256_maskload_epi32() {
4578 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4579 let a = &nums as *const i32;
4580 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4581 let r = _mm256_maskload_epi32(a, mask);
4582 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4583 assert_eq_m256i(r, e);
4584 }
4585
4586 #[simd_test(enable = "avx2")]
4587 unsafe fn test_mm_maskload_epi64() {
4588 let nums = [1_i64, 2_i64];
4589 let a = &nums as *const i64;
4590 let mask = _mm_setr_epi64x(0, -1);
4591 let r = _mm_maskload_epi64(a, mask);
4592 let e = _mm_setr_epi64x(0, 2);
4593 assert_eq_m128i(r, e);
4594 }
4595
4596 #[simd_test(enable = "avx2")]
4597 unsafe fn test_mm256_maskload_epi64() {
4598 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4599 let a = &nums as *const i64;
4600 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4601 let r = _mm256_maskload_epi64(a, mask);
4602 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4603 assert_eq_m256i(r, e);
4604 }
4605
4606 #[simd_test(enable = "avx2")]
4607 unsafe fn test_mm_maskstore_epi32() {
4608 let a = _mm_setr_epi32(1, 2, 3, 4);
4609 let mut arr = [-1, -1, -1, -1];
4610 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4611 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4612 let e = [1, -1, -1, 4];
4613 assert_eq!(arr, e);
4614 }
4615
4616 #[simd_test(enable = "avx2")]
4617 unsafe fn test_mm256_maskstore_epi32() {
4618 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4619 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4620 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4621 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4622 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4623 assert_eq!(arr, e);
4624 }
4625
4626 #[simd_test(enable = "avx2")]
4627 unsafe fn test_mm_maskstore_epi64() {
4628 let a = _mm_setr_epi64x(1_i64, 2_i64);
4629 let mut arr = [-1_i64, -1_i64];
4630 let mask = _mm_setr_epi64x(0, -1);
4631 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4632 let e = [-1, 2];
4633 assert_eq!(arr, e);
4634 }
4635
4636 #[simd_test(enable = "avx2")]
4637 unsafe fn test_mm256_maskstore_epi64() {
4638 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4639 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4640 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4641 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4642 let e = [-1, 2, 3, -1];
4643 assert_eq!(arr, e);
4644 }
4645
4646 #[simd_test(enable = "avx2")]
4647 unsafe fn test_mm256_max_epi16() {
4648 let a = _mm256_set1_epi16(2);
4649 let b = _mm256_set1_epi16(4);
4650 let r = _mm256_max_epi16(a, b);
4651 assert_eq_m256i(r, b);
4652 }
4653
4654 #[simd_test(enable = "avx2")]
4655 unsafe fn test_mm256_max_epi32() {
4656 let a = _mm256_set1_epi32(2);
4657 let b = _mm256_set1_epi32(4);
4658 let r = _mm256_max_epi32(a, b);
4659 assert_eq_m256i(r, b);
4660 }
4661
4662 #[simd_test(enable = "avx2")]
4663 unsafe fn test_mm256_max_epi8() {
4664 let a = _mm256_set1_epi8(2);
4665 let b = _mm256_set1_epi8(4);
4666 let r = _mm256_max_epi8(a, b);
4667 assert_eq_m256i(r, b);
4668 }
4669
4670 #[simd_test(enable = "avx2")]
4671 unsafe fn test_mm256_max_epu16() {
4672 let a = _mm256_set1_epi16(2);
4673 let b = _mm256_set1_epi16(4);
4674 let r = _mm256_max_epu16(a, b);
4675 assert_eq_m256i(r, b);
4676 }
4677
4678 #[simd_test(enable = "avx2")]
4679 unsafe fn test_mm256_max_epu32() {
4680 let a = _mm256_set1_epi32(2);
4681 let b = _mm256_set1_epi32(4);
4682 let r = _mm256_max_epu32(a, b);
4683 assert_eq_m256i(r, b);
4684 }
4685
4686 #[simd_test(enable = "avx2")]
4687 unsafe fn test_mm256_max_epu8() {
4688 let a = _mm256_set1_epi8(2);
4689 let b = _mm256_set1_epi8(4);
4690 let r = _mm256_max_epu8(a, b);
4691 assert_eq_m256i(r, b);
4692 }
4693
4694 #[simd_test(enable = "avx2")]
4695 unsafe fn test_mm256_min_epi16() {
4696 let a = _mm256_set1_epi16(2);
4697 let b = _mm256_set1_epi16(4);
4698 let r = _mm256_min_epi16(a, b);
4699 assert_eq_m256i(r, a);
4700 }
4701
4702 #[simd_test(enable = "avx2")]
4703 unsafe fn test_mm256_min_epi32() {
4704 let a = _mm256_set1_epi32(2);
4705 let b = _mm256_set1_epi32(4);
4706 let r = _mm256_min_epi32(a, b);
4707 assert_eq_m256i(r, a);
4708 }
4709
4710 #[simd_test(enable = "avx2")]
4711 unsafe fn test_mm256_min_epi8() {
4712 let a = _mm256_set1_epi8(2);
4713 let b = _mm256_set1_epi8(4);
4714 let r = _mm256_min_epi8(a, b);
4715 assert_eq_m256i(r, a);
4716 }
4717
4718 #[simd_test(enable = "avx2")]
4719 unsafe fn test_mm256_min_epu16() {
4720 let a = _mm256_set1_epi16(2);
4721 let b = _mm256_set1_epi16(4);
4722 let r = _mm256_min_epu16(a, b);
4723 assert_eq_m256i(r, a);
4724 }
4725
4726 #[simd_test(enable = "avx2")]
4727 unsafe fn test_mm256_min_epu32() {
4728 let a = _mm256_set1_epi32(2);
4729 let b = _mm256_set1_epi32(4);
4730 let r = _mm256_min_epu32(a, b);
4731 assert_eq_m256i(r, a);
4732 }
4733
4734 #[simd_test(enable = "avx2")]
4735 unsafe fn test_mm256_min_epu8() {
4736 let a = _mm256_set1_epi8(2);
4737 let b = _mm256_set1_epi8(4);
4738 let r = _mm256_min_epu8(a, b);
4739 assert_eq_m256i(r, a);
4740 }
4741
4742 #[simd_test(enable = "avx2")]
4743 unsafe fn test_mm256_movemask_epi8() {
4744 let a = _mm256_set1_epi8(-1);
4745 let r = _mm256_movemask_epi8(a);
4746 let e = -1;
4747 assert_eq!(r, e);
4748 }
4749
4750 #[simd_test(enable = "avx2")]
4751 unsafe fn test_mm256_mpsadbw_epu8() {
4752 let a = _mm256_set1_epi8(2);
4753 let b = _mm256_set1_epi8(4);
4754 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4755 let e = _mm256_set1_epi16(8);
4756 assert_eq_m256i(r, e);
4757 }
4758
4759 #[simd_test(enable = "avx2")]
4760 unsafe fn test_mm256_mul_epi32() {
4761 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4762 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4763 let r = _mm256_mul_epi32(a, b);
4764 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4765 assert_eq_m256i(r, e);
4766 }
4767
4768 #[simd_test(enable = "avx2")]
4769 unsafe fn test_mm256_mul_epu32() {
4770 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4771 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4772 let r = _mm256_mul_epu32(a, b);
4773 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4774 assert_eq_m256i(r, e);
4775 }
4776
4777 #[simd_test(enable = "avx2")]
4778 unsafe fn test_mm256_mulhi_epi16() {
4779 let a = _mm256_set1_epi16(6535);
4780 let b = _mm256_set1_epi16(6535);
4781 let r = _mm256_mulhi_epi16(a, b);
4782 let e = _mm256_set1_epi16(651);
4783 assert_eq_m256i(r, e);
4784 }
4785
4786 #[simd_test(enable = "avx2")]
4787 unsafe fn test_mm256_mulhi_epu16() {
4788 let a = _mm256_set1_epi16(6535);
4789 let b = _mm256_set1_epi16(6535);
4790 let r = _mm256_mulhi_epu16(a, b);
4791 let e = _mm256_set1_epi16(651);
4792 assert_eq_m256i(r, e);
4793 }
4794
4795 #[simd_test(enable = "avx2")]
4796 unsafe fn test_mm256_mullo_epi16() {
4797 let a = _mm256_set1_epi16(2);
4798 let b = _mm256_set1_epi16(4);
4799 let r = _mm256_mullo_epi16(a, b);
4800 let e = _mm256_set1_epi16(8);
4801 assert_eq_m256i(r, e);
4802 }
4803
4804 #[simd_test(enable = "avx2")]
4805 unsafe fn test_mm256_mullo_epi32() {
4806 let a = _mm256_set1_epi32(2);
4807 let b = _mm256_set1_epi32(4);
4808 let r = _mm256_mullo_epi32(a, b);
4809 let e = _mm256_set1_epi32(8);
4810 assert_eq_m256i(r, e);
4811 }
4812
4813 #[simd_test(enable = "avx2")]
4814 unsafe fn test_mm256_mulhrs_epi16() {
4815 let a = _mm256_set1_epi16(2);
4816 let b = _mm256_set1_epi16(4);
4817 let r = _mm256_mullo_epi16(a, b);
4818 let e = _mm256_set1_epi16(8);
4819 assert_eq_m256i(r, e);
4820 }
4821
4822 #[simd_test(enable = "avx2")]
4823 unsafe fn test_mm256_or_si256() {
4824 let a = _mm256_set1_epi8(-1);
4825 let b = _mm256_set1_epi8(0);
4826 let r = _mm256_or_si256(a, b);
4827 assert_eq_m256i(r, a);
4828 }
4829
4830 #[simd_test(enable = "avx2")]
4831 unsafe fn test_mm256_packs_epi16() {
4832 let a = _mm256_set1_epi16(2);
4833 let b = _mm256_set1_epi16(4);
4834 let r = _mm256_packs_epi16(a, b);
4835 #[rustfmt::skip]
4836 let e = _mm256_setr_epi8(
4837 2, 2, 2, 2, 2, 2, 2, 2,
4838 4, 4, 4, 4, 4, 4, 4, 4,
4839 2, 2, 2, 2, 2, 2, 2, 2,
4840 4, 4, 4, 4, 4, 4, 4, 4,
4841 );
4842
4843 assert_eq_m256i(r, e);
4844 }
4845
4846 #[simd_test(enable = "avx2")]
4847 unsafe fn test_mm256_packs_epi32() {
4848 let a = _mm256_set1_epi32(2);
4849 let b = _mm256_set1_epi32(4);
4850 let r = _mm256_packs_epi32(a, b);
4851 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4852
4853 assert_eq_m256i(r, e);
4854 }
4855
4856 #[simd_test(enable = "avx2")]
4857 unsafe fn test_mm256_packus_epi16() {
4858 let a = _mm256_set1_epi16(2);
4859 let b = _mm256_set1_epi16(4);
4860 let r = _mm256_packus_epi16(a, b);
4861 #[rustfmt::skip]
4862 let e = _mm256_setr_epi8(
4863 2, 2, 2, 2, 2, 2, 2, 2,
4864 4, 4, 4, 4, 4, 4, 4, 4,
4865 2, 2, 2, 2, 2, 2, 2, 2,
4866 4, 4, 4, 4, 4, 4, 4, 4,
4867 );
4868
4869 assert_eq_m256i(r, e);
4870 }
4871
4872 #[simd_test(enable = "avx2")]
4873 unsafe fn test_mm256_packus_epi32() {
4874 let a = _mm256_set1_epi32(2);
4875 let b = _mm256_set1_epi32(4);
4876 let r = _mm256_packus_epi32(a, b);
4877 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4878
4879 assert_eq_m256i(r, e);
4880 }
4881
4882 #[simd_test(enable = "avx2")]
4883 unsafe fn test_mm256_sad_epu8() {
4884 let a = _mm256_set1_epi8(2);
4885 let b = _mm256_set1_epi8(4);
4886 let r = _mm256_sad_epu8(a, b);
4887 let e = _mm256_set1_epi64x(16);
4888 assert_eq_m256i(r, e);
4889 }
4890
4891 #[simd_test(enable = "avx2")]
4892 unsafe fn test_mm256_shufflehi_epi16() {
4893 #[rustfmt::skip]
4894 let a = _mm256_setr_epi16(
4895 0, 1, 2, 3, 11, 22, 33, 44,
4896 4, 5, 6, 7, 55, 66, 77, 88,
4897 );
4898 #[rustfmt::skip]
4899 let e = _mm256_setr_epi16(
4900 0, 1, 2, 3, 44, 22, 22, 11,
4901 4, 5, 6, 7, 88, 66, 66, 55,
4902 );
4903 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
4904 assert_eq_m256i(r, e);
4905 }
4906
4907 #[simd_test(enable = "avx2")]
4908 unsafe fn test_mm256_shufflelo_epi16() {
4909 #[rustfmt::skip]
4910 let a = _mm256_setr_epi16(
4911 11, 22, 33, 44, 0, 1, 2, 3,
4912 55, 66, 77, 88, 4, 5, 6, 7,
4913 );
4914 #[rustfmt::skip]
4915 let e = _mm256_setr_epi16(
4916 44, 22, 22, 11, 0, 1, 2, 3,
4917 88, 66, 66, 55, 4, 5, 6, 7,
4918 );
4919 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
4920 assert_eq_m256i(r, e);
4921 }
4922
4923 #[simd_test(enable = "avx2")]
4924 unsafe fn test_mm256_sign_epi16() {
4925 let a = _mm256_set1_epi16(2);
4926 let b = _mm256_set1_epi16(-1);
4927 let r = _mm256_sign_epi16(a, b);
4928 let e = _mm256_set1_epi16(-2);
4929 assert_eq_m256i(r, e);
4930 }
4931
4932 #[simd_test(enable = "avx2")]
4933 unsafe fn test_mm256_sign_epi32() {
4934 let a = _mm256_set1_epi32(2);
4935 let b = _mm256_set1_epi32(-1);
4936 let r = _mm256_sign_epi32(a, b);
4937 let e = _mm256_set1_epi32(-2);
4938 assert_eq_m256i(r, e);
4939 }
4940
4941 #[simd_test(enable = "avx2")]
4942 unsafe fn test_mm256_sign_epi8() {
4943 let a = _mm256_set1_epi8(2);
4944 let b = _mm256_set1_epi8(-1);
4945 let r = _mm256_sign_epi8(a, b);
4946 let e = _mm256_set1_epi8(-2);
4947 assert_eq_m256i(r, e);
4948 }
4949
4950 #[simd_test(enable = "avx2")]
4951 unsafe fn test_mm256_sll_epi16() {
4952 let a = _mm256_set1_epi16(0xFF);
4953 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
4954 let r = _mm256_sll_epi16(a, b);
4955 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
4956 }
4957
4958 #[simd_test(enable = "avx2")]
4959 unsafe fn test_mm256_sll_epi32() {
4960 let a = _mm256_set1_epi32(0xFFFF);
4961 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
4962 let r = _mm256_sll_epi32(a, b);
4963 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
4964 }
4965
4966 #[simd_test(enable = "avx2")]
4967 unsafe fn test_mm256_sll_epi64() {
4968 let a = _mm256_set1_epi64x(0xFFFFFFFF);
4969 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
4970 let r = _mm256_sll_epi64(a, b);
4971 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
4972 }
4973
4974 #[simd_test(enable = "avx2")]
4975 unsafe fn test_mm256_slli_epi16() {
4976 assert_eq_m256i(
4977 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
4978 _mm256_set1_epi16(0xFF0),
4979 );
4980 }
4981
4982 #[simd_test(enable = "avx2")]
4983 unsafe fn test_mm256_slli_epi32() {
4984 assert_eq_m256i(
4985 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
4986 _mm256_set1_epi32(0xFFFF0),
4987 );
4988 }
4989
4990 #[simd_test(enable = "avx2")]
4991 unsafe fn test_mm256_slli_epi64() {
4992 assert_eq_m256i(
4993 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
4994 _mm256_set1_epi64x(0xFFFFFFFF0),
4995 );
4996 }
4997
4998 #[simd_test(enable = "avx2")]
4999 unsafe fn test_mm256_slli_si256() {
5000 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5001 let r = _mm256_slli_si256::<3>(a);
5002 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5003 }
5004
5005 #[simd_test(enable = "avx2")]
5006 unsafe fn test_mm_sllv_epi32() {
5007 let a = _mm_set1_epi32(2);
5008 let b = _mm_set1_epi32(1);
5009 let r = _mm_sllv_epi32(a, b);
5010 let e = _mm_set1_epi32(4);
5011 assert_eq_m128i(r, e);
5012 }
5013
5014 #[simd_test(enable = "avx2")]
5015 unsafe fn test_mm256_sllv_epi32() {
5016 let a = _mm256_set1_epi32(2);
5017 let b = _mm256_set1_epi32(1);
5018 let r = _mm256_sllv_epi32(a, b);
5019 let e = _mm256_set1_epi32(4);
5020 assert_eq_m256i(r, e);
5021 }
5022
5023 #[simd_test(enable = "avx2")]
5024 unsafe fn test_mm_sllv_epi64() {
5025 let a = _mm_set1_epi64x(2);
5026 let b = _mm_set1_epi64x(1);
5027 let r = _mm_sllv_epi64(a, b);
5028 let e = _mm_set1_epi64x(4);
5029 assert_eq_m128i(r, e);
5030 }
5031
5032 #[simd_test(enable = "avx2")]
5033 unsafe fn test_mm256_sllv_epi64() {
5034 let a = _mm256_set1_epi64x(2);
5035 let b = _mm256_set1_epi64x(1);
5036 let r = _mm256_sllv_epi64(a, b);
5037 let e = _mm256_set1_epi64x(4);
5038 assert_eq_m256i(r, e);
5039 }
5040
5041 #[simd_test(enable = "avx2")]
5042 unsafe fn test_mm256_sra_epi16() {
5043 let a = _mm256_set1_epi16(-1);
5044 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5045 let r = _mm256_sra_epi16(a, b);
5046 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5047 }
5048
5049 #[simd_test(enable = "avx2")]
5050 unsafe fn test_mm256_sra_epi32() {
5051 let a = _mm256_set1_epi32(-1);
5052 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5053 let r = _mm256_sra_epi32(a, b);
5054 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5055 }
5056
5057 #[simd_test(enable = "avx2")]
5058 unsafe fn test_mm256_srai_epi16() {
5059 assert_eq_m256i(
5060 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5061 _mm256_set1_epi16(-1),
5062 );
5063 }
5064
5065 #[simd_test(enable = "avx2")]
5066 unsafe fn test_mm256_srai_epi32() {
5067 assert_eq_m256i(
5068 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5069 _mm256_set1_epi32(-1),
5070 );
5071 }
5072
5073 #[simd_test(enable = "avx2")]
5074 unsafe fn test_mm_srav_epi32() {
5075 let a = _mm_set1_epi32(4);
5076 let count = _mm_set1_epi32(1);
5077 let r = _mm_srav_epi32(a, count);
5078 let e = _mm_set1_epi32(2);
5079 assert_eq_m128i(r, e);
5080 }
5081
5082 #[simd_test(enable = "avx2")]
5083 unsafe fn test_mm256_srav_epi32() {
5084 let a = _mm256_set1_epi32(4);
5085 let count = _mm256_set1_epi32(1);
5086 let r = _mm256_srav_epi32(a, count);
5087 let e = _mm256_set1_epi32(2);
5088 assert_eq_m256i(r, e);
5089 }
5090
5091 #[simd_test(enable = "avx2")]
5092 unsafe fn test_mm256_srli_si256() {
5093 #[rustfmt::skip]
5094 let a = _mm256_setr_epi8(
5095 1, 2, 3, 4, 5, 6, 7, 8,
5096 9, 10, 11, 12, 13, 14, 15, 16,
5097 17, 18, 19, 20, 21, 22, 23, 24,
5098 25, 26, 27, 28, 29, 30, 31, 32,
5099 );
5100 let r = _mm256_srli_si256::<3>(a);
5101 #[rustfmt::skip]
5102 let e = _mm256_setr_epi8(
5103 4, 5, 6, 7, 8, 9, 10, 11,
5104 12, 13, 14, 15, 16, 0, 0, 0,
5105 20, 21, 22, 23, 24, 25, 26, 27,
5106 28, 29, 30, 31, 32, 0, 0, 0,
5107 );
5108 assert_eq_m256i(r, e);
5109 }
5110
5111 #[simd_test(enable = "avx2")]
5112 unsafe fn test_mm256_srl_epi16() {
5113 let a = _mm256_set1_epi16(0xFF);
5114 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5115 let r = _mm256_srl_epi16(a, b);
5116 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5117 }
5118
5119 #[simd_test(enable = "avx2")]
5120 unsafe fn test_mm256_srl_epi32() {
5121 let a = _mm256_set1_epi32(0xFFFF);
5122 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5123 let r = _mm256_srl_epi32(a, b);
5124 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5125 }
5126
5127 #[simd_test(enable = "avx2")]
5128 unsafe fn test_mm256_srl_epi64() {
5129 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5130 let b = _mm_setr_epi64x(4, 0);
5131 let r = _mm256_srl_epi64(a, b);
5132 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5133 }
5134
5135 #[simd_test(enable = "avx2")]
5136 unsafe fn test_mm256_srli_epi16() {
5137 assert_eq_m256i(
5138 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5139 _mm256_set1_epi16(0xF),
5140 );
5141 }
5142
5143 #[simd_test(enable = "avx2")]
5144 unsafe fn test_mm256_srli_epi32() {
5145 assert_eq_m256i(
5146 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5147 _mm256_set1_epi32(0xFFF),
5148 );
5149 }
5150
5151 #[simd_test(enable = "avx2")]
5152 unsafe fn test_mm256_srli_epi64() {
5153 assert_eq_m256i(
5154 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5155 _mm256_set1_epi64x(0xFFFFFFF),
5156 );
5157 }
5158
5159 #[simd_test(enable = "avx2")]
5160 unsafe fn test_mm_srlv_epi32() {
5161 let a = _mm_set1_epi32(2);
5162 let count = _mm_set1_epi32(1);
5163 let r = _mm_srlv_epi32(a, count);
5164 let e = _mm_set1_epi32(1);
5165 assert_eq_m128i(r, e);
5166 }
5167
5168 #[simd_test(enable = "avx2")]
5169 unsafe fn test_mm256_srlv_epi32() {
5170 let a = _mm256_set1_epi32(2);
5171 let count = _mm256_set1_epi32(1);
5172 let r = _mm256_srlv_epi32(a, count);
5173 let e = _mm256_set1_epi32(1);
5174 assert_eq_m256i(r, e);
5175 }
5176
5177 #[simd_test(enable = "avx2")]
5178 unsafe fn test_mm_srlv_epi64() {
5179 let a = _mm_set1_epi64x(2);
5180 let count = _mm_set1_epi64x(1);
5181 let r = _mm_srlv_epi64(a, count);
5182 let e = _mm_set1_epi64x(1);
5183 assert_eq_m128i(r, e);
5184 }
5185
5186 #[simd_test(enable = "avx2")]
5187 unsafe fn test_mm256_srlv_epi64() {
5188 let a = _mm256_set1_epi64x(2);
5189 let count = _mm256_set1_epi64x(1);
5190 let r = _mm256_srlv_epi64(a, count);
5191 let e = _mm256_set1_epi64x(1);
5192 assert_eq_m256i(r, e);
5193 }
5194
5195 #[simd_test(enable = "avx2")]
5196 unsafe fn test_mm256_sub_epi16() {
5197 let a = _mm256_set1_epi16(4);
5198 let b = _mm256_set1_epi16(2);
5199 let r = _mm256_sub_epi16(a, b);
5200 assert_eq_m256i(r, b);
5201 }
5202
5203 #[simd_test(enable = "avx2")]
5204 unsafe fn test_mm256_sub_epi32() {
5205 let a = _mm256_set1_epi32(4);
5206 let b = _mm256_set1_epi32(2);
5207 let r = _mm256_sub_epi32(a, b);
5208 assert_eq_m256i(r, b);
5209 }
5210
5211 #[simd_test(enable = "avx2")]
5212 unsafe fn test_mm256_sub_epi64() {
5213 let a = _mm256_set1_epi64x(4);
5214 let b = _mm256_set1_epi64x(2);
5215 let r = _mm256_sub_epi64(a, b);
5216 assert_eq_m256i(r, b);
5217 }
5218
5219 #[simd_test(enable = "avx2")]
5220 unsafe fn test_mm256_sub_epi8() {
5221 let a = _mm256_set1_epi8(4);
5222 let b = _mm256_set1_epi8(2);
5223 let r = _mm256_sub_epi8(a, b);
5224 assert_eq_m256i(r, b);
5225 }
5226
5227 #[simd_test(enable = "avx2")]
5228 unsafe fn test_mm256_subs_epi16() {
5229 let a = _mm256_set1_epi16(4);
5230 let b = _mm256_set1_epi16(2);
5231 let r = _mm256_subs_epi16(a, b);
5232 assert_eq_m256i(r, b);
5233 }
5234
5235 #[simd_test(enable = "avx2")]
5236 unsafe fn test_mm256_subs_epi8() {
5237 let a = _mm256_set1_epi8(4);
5238 let b = _mm256_set1_epi8(2);
5239 let r = _mm256_subs_epi8(a, b);
5240 assert_eq_m256i(r, b);
5241 }
5242
5243 #[simd_test(enable = "avx2")]
5244 unsafe fn test_mm256_subs_epu16() {
5245 let a = _mm256_set1_epi16(4);
5246 let b = _mm256_set1_epi16(2);
5247 let r = _mm256_subs_epu16(a, b);
5248 assert_eq_m256i(r, b);
5249 }
5250
5251 #[simd_test(enable = "avx2")]
5252 unsafe fn test_mm256_subs_epu8() {
5253 let a = _mm256_set1_epi8(4);
5254 let b = _mm256_set1_epi8(2);
5255 let r = _mm256_subs_epu8(a, b);
5256 assert_eq_m256i(r, b);
5257 }
5258
5259 #[simd_test(enable = "avx2")]
5260 unsafe fn test_mm256_xor_si256() {
5261 let a = _mm256_set1_epi8(5);
5262 let b = _mm256_set1_epi8(3);
5263 let r = _mm256_xor_si256(a, b);
5264 assert_eq_m256i(r, _mm256_set1_epi8(6));
5265 }
5266
5267 #[simd_test(enable = "avx2")]
5268 unsafe fn test_mm256_alignr_epi8() {
5269 #[rustfmt::skip]
5270 let a = _mm256_setr_epi8(
5271 1, 2, 3, 4, 5, 6, 7, 8,
5272 9, 10, 11, 12, 13, 14, 15, 16,
5273 17, 18, 19, 20, 21, 22, 23, 24,
5274 25, 26, 27, 28, 29, 30, 31, 32,
5275 );
5276 #[rustfmt::skip]
5277 let b = _mm256_setr_epi8(
5278 -1, -2, -3, -4, -5, -6, -7, -8,
5279 -9, -10, -11, -12, -13, -14, -15, -16,
5280 -17, -18, -19, -20, -21, -22, -23, -24,
5281 -25, -26, -27, -28, -29, -30, -31, -32,
5282 );
5283 let r = _mm256_alignr_epi8::<33>(a, b);
5284 assert_eq_m256i(r, _mm256_set1_epi8(0));
5285
5286 let r = _mm256_alignr_epi8::<17>(a, b);
5287 #[rustfmt::skip]
5288 let expected = _mm256_setr_epi8(
5289 2, 3, 4, 5, 6, 7, 8, 9,
5290 10, 11, 12, 13, 14, 15, 16, 0,
5291 18, 19, 20, 21, 22, 23, 24, 25,
5292 26, 27, 28, 29, 30, 31, 32, 0,
5293 );
5294 assert_eq_m256i(r, expected);
5295
5296 let r = _mm256_alignr_epi8::<4>(a, b);
5297 #[rustfmt::skip]
5298 let expected = _mm256_setr_epi8(
5299 -5, -6, -7, -8, -9, -10, -11, -12,
5300 -13, -14, -15, -16, 1, 2, 3, 4,
5301 -21, -22, -23, -24, -25, -26, -27, -28,
5302 -29, -30, -31, -32, 17, 18, 19, 20,
5303 );
5304 assert_eq_m256i(r, expected);
5305
5306 #[rustfmt::skip]
5307 let expected = _mm256_setr_epi8(
5308 -1, -2, -3, -4, -5, -6, -7, -8,
5309 -9, -10, -11, -12, -13, -14, -15, -16, -17,
5310 -18, -19, -20, -21, -22, -23, -24, -25,
5311 -26, -27, -28, -29, -30, -31, -32,
5312 );
5313 let r = _mm256_alignr_epi8::<16>(a, b);
5314 assert_eq_m256i(r, expected);
5315
5316 let r = _mm256_alignr_epi8::<15>(a, b);
5317 #[rustfmt::skip]
5318 let expected = _mm256_setr_epi8(
5319 -16, 1, 2, 3, 4, 5, 6, 7,
5320 8, 9, 10, 11, 12, 13, 14, 15,
5321 -32, 17, 18, 19, 20, 21, 22, 23,
5322 24, 25, 26, 27, 28, 29, 30, 31,
5323 );
5324 assert_eq_m256i(r, expected);
5325
5326 let r = _mm256_alignr_epi8::<0>(a, b);
5327 assert_eq_m256i(r, b);
5328 }
5329
5330 #[simd_test(enable = "avx2")]
5331 unsafe fn test_mm256_shuffle_epi8() {
5332 #[rustfmt::skip]
5333 let a = _mm256_setr_epi8(
5334 1, 2, 3, 4, 5, 6, 7, 8,
5335 9, 10, 11, 12, 13, 14, 15, 16,
5336 17, 18, 19, 20, 21, 22, 23, 24,
5337 25, 26, 27, 28, 29, 30, 31, 32,
5338 );
5339 #[rustfmt::skip]
5340 let b = _mm256_setr_epi8(
5341 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5342 12, 5, 5, 10, 4, 1, 8, 0,
5343 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5344 12, 5, 5, 10, 4, 1, 8, 0,
5345 );
5346 #[rustfmt::skip]
5347 let expected = _mm256_setr_epi8(
5348 5, 0, 5, 4, 9, 13, 7, 4,
5349 13, 6, 6, 11, 5, 2, 9, 1,
5350 21, 0, 21, 20, 25, 29, 23, 20,
5351 29, 22, 22, 27, 21, 18, 25, 17,
5352 );
5353 let r = _mm256_shuffle_epi8(a, b);
5354 assert_eq_m256i(r, expected);
5355 }
5356
5357 #[simd_test(enable = "avx2")]
5358 unsafe fn test_mm256_permutevar8x32_epi32() {
5359 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5360 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5361 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5362 let r = _mm256_permutevar8x32_epi32(a, b);
5363 assert_eq_m256i(r, expected);
5364 }
5365
5366 #[simd_test(enable = "avx2")]
5367 unsafe fn test_mm256_permute4x64_epi64() {
5368 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5369 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5370 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5371 assert_eq_m256i(r, expected);
5372 }
5373
5374 #[simd_test(enable = "avx2")]
5375 unsafe fn test_mm256_permute2x128_si256() {
5376 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5377 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5378 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5379 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5380 assert_eq_m256i(r, e);
5381 }
5382
5383 #[simd_test(enable = "avx2")]
5384 unsafe fn test_mm256_permute4x64_pd() {
5385 let a = _mm256_setr_pd(1., 2., 3., 4.);
5386 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5387 let e = _mm256_setr_pd(4., 1., 2., 1.);
5388 assert_eq_m256d(r, e);
5389 }
5390
5391 #[simd_test(enable = "avx2")]
5392 unsafe fn test_mm256_permutevar8x32_ps() {
5393 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5394 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5395 let r = _mm256_permutevar8x32_ps(a, b);
5396 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5397 assert_eq_m256(r, e);
5398 }
5399
5400 #[simd_test(enable = "avx2")]
5401 unsafe fn test_mm_i32gather_epi32() {
5402 let mut arr = [0i32; 128];
5403 for i in 0..128i32 {
5404 arr[i as usize] = i;
5405 }
5406 // A multiplier of 4 is word-addressing
5407 let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5408 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5409 }
5410
5411 #[simd_test(enable = "avx2")]
5412 unsafe fn test_mm_mask_i32gather_epi32() {
5413 let mut arr = [0i32; 128];
5414 for i in 0..128i32 {
5415 arr[i as usize] = i;
5416 }
5417 // A multiplier of 4 is word-addressing
5418 let r = _mm_mask_i32gather_epi32::<4>(
5419 _mm_set1_epi32(256),
5420 arr.as_ptr(),
5421 _mm_setr_epi32(0, 16, 64, 96),
5422 _mm_setr_epi32(-1, -1, -1, 0),
5423 );
5424 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5425 }
5426
5427 #[simd_test(enable = "avx2")]
5428 unsafe fn test_mm256_i32gather_epi32() {
5429 let mut arr = [0i32; 128];
5430 for i in 0..128i32 {
5431 arr[i as usize] = i;
5432 }
5433 // A multiplier of 4 is word-addressing
5434 let r =
5435 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5436 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5437 }
5438
5439 #[simd_test(enable = "avx2")]
5440 unsafe fn test_mm256_mask_i32gather_epi32() {
5441 let mut arr = [0i32; 128];
5442 for i in 0..128i32 {
5443 arr[i as usize] = i;
5444 }
5445 // A multiplier of 4 is word-addressing
5446 let r = _mm256_mask_i32gather_epi32::<4>(
5447 _mm256_set1_epi32(256),
5448 arr.as_ptr(),
5449 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5450 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5451 );
5452 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5453 }
5454
5455 #[simd_test(enable = "avx2")]
5456 unsafe fn test_mm_i32gather_ps() {
5457 let mut arr = [0.0f32; 128];
5458 let mut j = 0.0;
5459 for i in 0..128usize {
5460 arr[i] = j;
5461 j += 1.0;
5462 }
5463 // A multiplier of 4 is word-addressing for f32s
5464 let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5465 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5466 }
5467
5468 #[simd_test(enable = "avx2")]
5469 unsafe fn test_mm_mask_i32gather_ps() {
5470 let mut arr = [0.0f32; 128];
5471 let mut j = 0.0;
5472 for i in 0..128usize {
5473 arr[i] = j;
5474 j += 1.0;
5475 }
5476 // A multiplier of 4 is word-addressing for f32s
5477 let r = _mm_mask_i32gather_ps::<4>(
5478 _mm_set1_ps(256.0),
5479 arr.as_ptr(),
5480 _mm_setr_epi32(0, 16, 64, 96),
5481 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5482 );
5483 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5484 }
5485
5486 #[simd_test(enable = "avx2")]
5487 unsafe fn test_mm256_i32gather_ps() {
5488 let mut arr = [0.0f32; 128];
5489 let mut j = 0.0;
5490 for i in 0..128usize {
5491 arr[i] = j;
5492 j += 1.0;
5493 }
5494 // A multiplier of 4 is word-addressing for f32s
5495 let r =
5496 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5497 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5498 }
5499
5500 #[simd_test(enable = "avx2")]
5501 unsafe fn test_mm256_mask_i32gather_ps() {
5502 let mut arr = [0.0f32; 128];
5503 let mut j = 0.0;
5504 for i in 0..128usize {
5505 arr[i] = j;
5506 j += 1.0;
5507 }
5508 // A multiplier of 4 is word-addressing for f32s
5509 let r = _mm256_mask_i32gather_ps::<4>(
5510 _mm256_set1_ps(256.0),
5511 arr.as_ptr(),
5512 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5513 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5514 );
5515 assert_eq_m256(
5516 r,
5517 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5518 );
5519 }
5520
5521 #[simd_test(enable = "avx2")]
5522 unsafe fn test_mm_i32gather_epi64() {
5523 let mut arr = [0i64; 128];
5524 for i in 0..128i64 {
5525 arr[i as usize] = i;
5526 }
5527 // A multiplier of 8 is word-addressing for i64s
5528 let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5529 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5530 }
5531
5532 #[simd_test(enable = "avx2")]
5533 unsafe fn test_mm_mask_i32gather_epi64() {
5534 let mut arr = [0i64; 128];
5535 for i in 0..128i64 {
5536 arr[i as usize] = i;
5537 }
5538 // A multiplier of 8 is word-addressing for i64s
5539 let r = _mm_mask_i32gather_epi64::<8>(
5540 _mm_set1_epi64x(256),
5541 arr.as_ptr(),
5542 _mm_setr_epi32(16, 16, 16, 16),
5543 _mm_setr_epi64x(-1, 0),
5544 );
5545 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5546 }
5547
5548 #[simd_test(enable = "avx2")]
5549 unsafe fn test_mm256_i32gather_epi64() {
5550 let mut arr = [0i64; 128];
5551 for i in 0..128i64 {
5552 arr[i as usize] = i;
5553 }
5554 // A multiplier of 8 is word-addressing for i64s
5555 let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5556 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5557 }
5558
5559 #[simd_test(enable = "avx2")]
5560 unsafe fn test_mm256_mask_i32gather_epi64() {
5561 let mut arr = [0i64; 128];
5562 for i in 0..128i64 {
5563 arr[i as usize] = i;
5564 }
5565 // A multiplier of 8 is word-addressing for i64s
5566 let r = _mm256_mask_i32gather_epi64::<8>(
5567 _mm256_set1_epi64x(256),
5568 arr.as_ptr(),
5569 _mm_setr_epi32(0, 16, 64, 96),
5570 _mm256_setr_epi64x(-1, -1, -1, 0),
5571 );
5572 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5573 }
5574
5575 #[simd_test(enable = "avx2")]
5576 unsafe fn test_mm_i32gather_pd() {
5577 let mut arr = [0.0f64; 128];
5578 let mut j = 0.0;
5579 for i in 0..128usize {
5580 arr[i] = j;
5581 j += 1.0;
5582 }
5583 // A multiplier of 8 is word-addressing for f64s
5584 let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5585 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5586 }
5587
5588 #[simd_test(enable = "avx2")]
5589 unsafe fn test_mm_mask_i32gather_pd() {
5590 let mut arr = [0.0f64; 128];
5591 let mut j = 0.0;
5592 for i in 0..128usize {
5593 arr[i] = j;
5594 j += 1.0;
5595 }
5596 // A multiplier of 8 is word-addressing for f64s
5597 let r = _mm_mask_i32gather_pd::<8>(
5598 _mm_set1_pd(256.0),
5599 arr.as_ptr(),
5600 _mm_setr_epi32(16, 16, 16, 16),
5601 _mm_setr_pd(-1.0, 0.0),
5602 );
5603 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5604 }
5605
5606 #[simd_test(enable = "avx2")]
5607 unsafe fn test_mm256_i32gather_pd() {
5608 let mut arr = [0.0f64; 128];
5609 let mut j = 0.0;
5610 for i in 0..128usize {
5611 arr[i] = j;
5612 j += 1.0;
5613 }
5614 // A multiplier of 8 is word-addressing for f64s
5615 let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5616 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5617 }
5618
5619 #[simd_test(enable = "avx2")]
5620 unsafe fn test_mm256_mask_i32gather_pd() {
5621 let mut arr = [0.0f64; 128];
5622 let mut j = 0.0;
5623 for i in 0..128usize {
5624 arr[i] = j;
5625 j += 1.0;
5626 }
5627 // A multiplier of 8 is word-addressing for f64s
5628 let r = _mm256_mask_i32gather_pd::<8>(
5629 _mm256_set1_pd(256.0),
5630 arr.as_ptr(),
5631 _mm_setr_epi32(0, 16, 64, 96),
5632 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5633 );
5634 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5635 }
5636
5637 #[simd_test(enable = "avx2")]
5638 unsafe fn test_mm_i64gather_epi32() {
5639 let mut arr = [0i32; 128];
5640 for i in 0..128i32 {
5641 arr[i as usize] = i;
5642 }
5643 // A multiplier of 4 is word-addressing
5644 let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5645 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5646 }
5647
5648 #[simd_test(enable = "avx2")]
5649 unsafe fn test_mm_mask_i64gather_epi32() {
5650 let mut arr = [0i32; 128];
5651 for i in 0..128i32 {
5652 arr[i as usize] = i;
5653 }
5654 // A multiplier of 4 is word-addressing
5655 let r = _mm_mask_i64gather_epi32::<4>(
5656 _mm_set1_epi32(256),
5657 arr.as_ptr(),
5658 _mm_setr_epi64x(0, 16),
5659 _mm_setr_epi32(-1, 0, -1, 0),
5660 );
5661 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5662 }
5663
5664 #[simd_test(enable = "avx2")]
5665 unsafe fn test_mm256_i64gather_epi32() {
5666 let mut arr = [0i32; 128];
5667 for i in 0..128i32 {
5668 arr[i as usize] = i;
5669 }
5670 // A multiplier of 4 is word-addressing
5671 let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5672 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5673 }
5674
5675 #[simd_test(enable = "avx2")]
5676 unsafe fn test_mm256_mask_i64gather_epi32() {
5677 let mut arr = [0i32; 128];
5678 for i in 0..128i32 {
5679 arr[i as usize] = i;
5680 }
5681 // A multiplier of 4 is word-addressing
5682 let r = _mm256_mask_i64gather_epi32::<4>(
5683 _mm_set1_epi32(256),
5684 arr.as_ptr(),
5685 _mm256_setr_epi64x(0, 16, 64, 96),
5686 _mm_setr_epi32(-1, -1, -1, 0),
5687 );
5688 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5689 }
5690
5691 #[simd_test(enable = "avx2")]
5692 unsafe fn test_mm_i64gather_ps() {
5693 let mut arr = [0.0f32; 128];
5694 let mut j = 0.0;
5695 for i in 0..128usize {
5696 arr[i] = j;
5697 j += 1.0;
5698 }
5699 // A multiplier of 4 is word-addressing for f32s
5700 let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5701 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5702 }
5703
5704 #[simd_test(enable = "avx2")]
5705 unsafe fn test_mm_mask_i64gather_ps() {
5706 let mut arr = [0.0f32; 128];
5707 let mut j = 0.0;
5708 for i in 0..128usize {
5709 arr[i] = j;
5710 j += 1.0;
5711 }
5712 // A multiplier of 4 is word-addressing for f32s
5713 let r = _mm_mask_i64gather_ps::<4>(
5714 _mm_set1_ps(256.0),
5715 arr.as_ptr(),
5716 _mm_setr_epi64x(0, 16),
5717 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5718 );
5719 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5720 }
5721
5722 #[simd_test(enable = "avx2")]
5723 unsafe fn test_mm256_i64gather_ps() {
5724 let mut arr = [0.0f32; 128];
5725 let mut j = 0.0;
5726 for i in 0..128usize {
5727 arr[i] = j;
5728 j += 1.0;
5729 }
5730 // A multiplier of 4 is word-addressing for f32s
5731 let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5732 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5733 }
5734
5735 #[simd_test(enable = "avx2")]
5736 unsafe fn test_mm256_mask_i64gather_ps() {
5737 let mut arr = [0.0f32; 128];
5738 let mut j = 0.0;
5739 for i in 0..128usize {
5740 arr[i] = j;
5741 j += 1.0;
5742 }
5743 // A multiplier of 4 is word-addressing for f32s
5744 let r = _mm256_mask_i64gather_ps::<4>(
5745 _mm_set1_ps(256.0),
5746 arr.as_ptr(),
5747 _mm256_setr_epi64x(0, 16, 64, 96),
5748 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5749 );
5750 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5751 }
5752
5753 #[simd_test(enable = "avx2")]
5754 unsafe fn test_mm_i64gather_epi64() {
5755 let mut arr = [0i64; 128];
5756 for i in 0..128i64 {
5757 arr[i as usize] = i;
5758 }
5759 // A multiplier of 8 is word-addressing for i64s
5760 let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5761 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5762 }
5763
5764 #[simd_test(enable = "avx2")]
5765 unsafe fn test_mm_mask_i64gather_epi64() {
5766 let mut arr = [0i64; 128];
5767 for i in 0..128i64 {
5768 arr[i as usize] = i;
5769 }
5770 // A multiplier of 8 is word-addressing for i64s
5771 let r = _mm_mask_i64gather_epi64::<8>(
5772 _mm_set1_epi64x(256),
5773 arr.as_ptr(),
5774 _mm_setr_epi64x(16, 16),
5775 _mm_setr_epi64x(-1, 0),
5776 );
5777 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5778 }
5779
5780 #[simd_test(enable = "avx2")]
5781 unsafe fn test_mm256_i64gather_epi64() {
5782 let mut arr = [0i64; 128];
5783 for i in 0..128i64 {
5784 arr[i as usize] = i;
5785 }
5786 // A multiplier of 8 is word-addressing for i64s
5787 let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5788 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5789 }
5790
5791 #[simd_test(enable = "avx2")]
5792 unsafe fn test_mm256_mask_i64gather_epi64() {
5793 let mut arr = [0i64; 128];
5794 for i in 0..128i64 {
5795 arr[i as usize] = i;
5796 }
5797 // A multiplier of 8 is word-addressing for i64s
5798 let r = _mm256_mask_i64gather_epi64::<8>(
5799 _mm256_set1_epi64x(256),
5800 arr.as_ptr(),
5801 _mm256_setr_epi64x(0, 16, 64, 96),
5802 _mm256_setr_epi64x(-1, -1, -1, 0),
5803 );
5804 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5805 }
5806
5807 #[simd_test(enable = "avx2")]
5808 unsafe fn test_mm_i64gather_pd() {
5809 let mut arr = [0.0f64; 128];
5810 let mut j = 0.0;
5811 for i in 0..128usize {
5812 arr[i] = j;
5813 j += 1.0;
5814 }
5815 // A multiplier of 8 is word-addressing for f64s
5816 let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5817 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5818 }
5819
5820 #[simd_test(enable = "avx2")]
5821 unsafe fn test_mm_mask_i64gather_pd() {
5822 let mut arr = [0.0f64; 128];
5823 let mut j = 0.0;
5824 for i in 0..128usize {
5825 arr[i] = j;
5826 j += 1.0;
5827 }
5828 // A multiplier of 8 is word-addressing for f64s
5829 let r = _mm_mask_i64gather_pd::<8>(
5830 _mm_set1_pd(256.0),
5831 arr.as_ptr(),
5832 _mm_setr_epi64x(16, 16),
5833 _mm_setr_pd(-1.0, 0.0),
5834 );
5835 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5836 }
5837
5838 #[simd_test(enable = "avx2")]
5839 unsafe fn test_mm256_i64gather_pd() {
5840 let mut arr = [0.0f64; 128];
5841 let mut j = 0.0;
5842 for i in 0..128usize {
5843 arr[i] = j;
5844 j += 1.0;
5845 }
5846 // A multiplier of 8 is word-addressing for f64s
5847 let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5848 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5849 }
5850
5851 #[simd_test(enable = "avx2")]
5852 unsafe fn test_mm256_mask_i64gather_pd() {
5853 let mut arr = [0.0f64; 128];
5854 let mut j = 0.0;
5855 for i in 0..128usize {
5856 arr[i] = j;
5857 j += 1.0;
5858 }
5859 // A multiplier of 8 is word-addressing for f64s
5860 let r = _mm256_mask_i64gather_pd::<8>(
5861 _mm256_set1_pd(256.0),
5862 arr.as_ptr(),
5863 _mm256_setr_epi64x(0, 16, 64, 96),
5864 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5865 );
5866 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5867 }
5868
5869 #[simd_test(enable = "avx")]
5870 unsafe fn test_mm256_extract_epi8() {
5871 #[rustfmt::skip]
5872 let a = _mm256_setr_epi8(
5873 -1, 1, 2, 3, 4, 5, 6, 7,
5874 8, 9, 10, 11, 12, 13, 14, 15,
5875 16, 17, 18, 19, 20, 21, 22, 23,
5876 24, 25, 26, 27, 28, 29, 30, 31
5877 );
5878 let r1 = _mm256_extract_epi8::<0>(a);
5879 let r2 = _mm256_extract_epi8::<3>(a);
5880 assert_eq!(r1, 0xFF);
5881 assert_eq!(r2, 3);
5882 }
5883
5884 #[simd_test(enable = "avx2")]
5885 unsafe fn test_mm256_extract_epi16() {
5886 #[rustfmt::skip]
5887 let a = _mm256_setr_epi16(
5888 -1, 1, 2, 3, 4, 5, 6, 7,
5889 8, 9, 10, 11, 12, 13, 14, 15,
5890 );
5891 let r1 = _mm256_extract_epi16::<0>(a);
5892 let r2 = _mm256_extract_epi16::<3>(a);
5893 assert_eq!(r1, 0xFFFF);
5894 assert_eq!(r2, 3);
5895 }
5896
5897 #[simd_test(enable = "avx2")]
5898 unsafe fn test_mm256_extract_epi32() {
5899 let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
5900 let r1 = _mm256_extract_epi32::<0>(a);
5901 let r2 = _mm256_extract_epi32::<3>(a);
5902 assert_eq!(r1, -1);
5903 assert_eq!(r2, 3);
5904 }
5905
5906 #[simd_test(enable = "avx2")]
5907 unsafe fn test_mm256_cvtsd_f64() {
5908 let a = _mm256_setr_pd(1., 2., 3., 4.);
5909 let r = _mm256_cvtsd_f64(a);
5910 assert_eq!(r, 1.);
5911 }
5912
5913 #[simd_test(enable = "avx2")]
5914 unsafe fn test_mm256_cvtsi256_si32() {
5915 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
5916 let r = _mm256_cvtsi256_si32(a);
5917 assert_eq!(r, 1);
5918 }
5919}
5920